Commit 0f6e985f authored by Akiva Goldberger's avatar Akiva Goldberger Committed by Zhengchao Shao
Browse files

net/mlx5: Add a timeout to acquire the command queue semaphore

mainline inclusion
from mainline-v6.10-rc1
commit 485d65e1357123a697c591a5aeb773994b247ad7
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IA6S89
CVE: CVE-2024-38556

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=485d65e1357123a697c591a5aeb773994b247ad7



--------------------------------

Prevent forced completion handling on an entry that has not yet been
assigned an index, causing an out of bounds access on idx = -22.
Instead of waiting indefinitely for the sem, blocking flow now waits for
index to be allocated or a sem acquisition timeout before beginning the
timer for FW completion.

Kernel log example:
mlx5_core 0000:06:00.0: wait_func_handle_exec_timeout:1128:(pid 185911): cmd[-22]: CREATE_UCTX(0xa04) No done completion

Fixes: 8e715cd6 ("net/mlx5: Set command entry semaphore up once got index free")
Signed-off-by: default avatarAkiva Goldberger <agoldberger@nvidia.com>
Reviewed-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240509112951.590184-5-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>

Conflicts:
	drivers/net/ethernet/mellanox/mlx5/core/cmd.c
[The conflict occurs because the commit 5945e1ad("net/mlx5: Read
timeout values from init segment") and 58db7286("net/mlx5:
Re-organize mlx5_cmd struct") and 8f5100da56b3("net/mlx5e: Fix a
race in command alloc flow") and 7cb5eb93("net/mlx5: Introduce
and use opcode getter in command interface") and f0864701("net/mlx5:
cmdif, Return value improvements") are not merged. MLX5_CMD_TIMEOUT_MSEC
is used for timeout and cannot be configured by users. struct mlx5_cmd
does not contain the var variable and directly references to sem.]
Signed-off-by: default avatarZhengchao Shao <shaozhengchao@huawei.com>
parent 5bde6558
Loading
Loading
Loading
Loading
+29 −7
Original line number Diff line number Diff line
@@ -900,18 +900,30 @@ static void cmd_work_handler(struct work_struct *work)
	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
	struct mlx5_cmd *cmd = ent->cmd;
	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
	unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
	struct mlx5_cmd_layout *lay;
	struct semaphore *sem;
	unsigned long flags;
	bool poll_cmd = ent->polling;
	int alloc_ret;
	int cmd_mode;

	complete(&ent->handling);
	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
	down(sem);
	if (!ent->page_queue) {
		if (down_timeout(&cmd->sem, timeout)) {
			mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n",
				       mlx5_command_str(ent->op), ent->op);
			if (ent->callback) {
				ent->callback(-EBUSY, ent->context);
				mlx5_free_cmd_msg(dev, ent->out);
				free_msg(dev, ent->in);
				cmd_ent_put(ent);
			} else {
				ent->ret = -EBUSY;
				complete(&ent->done);
			}
			complete(&ent->slotted);
			return;
		}
		alloc_ret = cmd_alloc_index(cmd);
		if (alloc_ret < 0) {
			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
@@ -924,11 +936,12 @@ static void cmd_work_handler(struct work_struct *work)
				ent->ret = -EAGAIN;
				complete(&ent->done);
			}
			up(sem);
			up(&cmd->sem);
			return;
		}
		ent->idx = alloc_ret;
	} else {
		down(&cmd->pages_sem);
		ent->idx = cmd->max_reg_cmds;
		spin_lock_irqsave(&cmd->alloc_lock, flags);
		clear_bit(ent->idx, &cmd->bitmask);
@@ -936,6 +949,8 @@ static void cmd_work_handler(struct work_struct *work)
	}

	cmd->ent_arr[ent->idx] = ent;

	complete(&ent->slotted);
	lay = get_inst(cmd, ent->idx);
	ent->lay = lay;
	memset(lay, 0, sizeof(*lay));
@@ -955,7 +970,7 @@ static void cmd_work_handler(struct work_struct *work)
	ent->ts1 = ktime_get_ns();
	cmd_mode = cmd->mode;

	if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
	if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, timeout))
		cmd_ent_get(ent);
	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);

@@ -1056,6 +1071,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
		ent->ret = -ECANCELED;
		goto out_err;
	}

	wait_for_completion(&ent->slotted);

	if (cmd->mode == CMD_MODE_POLLING || ent->polling)
		wait_for_completion(&ent->done);
	else if (!wait_for_completion_timeout(&ent->done, timeout))
@@ -1072,6 +1090,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
		mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
			       mlx5_command_str(msg_to_opcode(ent->in)),
			       msg_to_opcode(ent->in));
	} else if (err == -EBUSY) {
		mlx5_core_warn(dev, "%s(0x%x) timeout while waiting for command semaphore.\n",
			       mlx5_command_str(ent->op), ent->op);
	}
	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
		      err, deliv_status_to_str(ent->status), ent->status);
@@ -1113,6 +1134,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
	ent->polling = force_polling;

	init_completion(&ent->handling);
	init_completion(&ent->slotted);
	if (!callback)
		init_completion(&ent->done);

@@ -1130,7 +1152,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
		goto out; /* mlx5_cmd_comp_handler() will put(ent) */

	err = wait_func(dev, ent);
	if (err == -ETIMEDOUT || err == -ECANCELED)
	if (err == -ETIMEDOUT || err == -ECANCELED || err == -EBUSY)
		goto out_free;

	ds = ent->ts2 - ent->ts1;
+1 −0
Original line number Diff line number Diff line
@@ -756,6 +756,7 @@ struct mlx5_cmd_work_ent {
	void		       *context;
	int			idx;
	struct completion	handling;
	struct completion	slotted;
	struct completion	done;
	struct mlx5_cmd        *cmd;
	struct work_struct	work;