Commit 9945a67e authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson
Browse files

vfio/mlx5: Refactor PD usage



This patch refactors PD usage such as its life cycle will be as of the
migration file instead of allocating/destroying it upon each SAVE/LOAD
command.

This is a preparation step towards the PRE_COPY series where multiple
images will be SAVED/LOADED and a single PD can be simply reused.

Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-5-yishaih@nvidia.com


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 0e7caa65
Loading
Loading
Loading
Loading
+35 −18
Original line number Original line Diff line number Diff line
@@ -279,7 +279,6 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)


	mlx5_core_destroy_mkey(mdev, async_data->mkey);
	mlx5_core_destroy_mkey(mdev, async_data->mkey);
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
	mlx5_core_dealloc_pd(mdev, async_data->pdn);
	kvfree(async_data->out);
	kvfree(async_data->out);
	complete(&migf->save_comp);
	complete(&migf->save_comp);
	fput(migf->filp);
	fput(migf->filp);
@@ -314,7 +313,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
	struct mlx5vf_async_data *async_data;
	struct mlx5vf_async_data *async_data;
	struct mlx5_core_dev *mdev;
	struct mlx5_core_dev *mdev;
	u32 pdn, mkey;
	u32 mkey;
	int err;
	int err;


	lockdep_assert_held(&mvdev->state_mutex);
	lockdep_assert_held(&mvdev->state_mutex);
@@ -326,16 +325,12 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	if (err)
	if (err)
		return err;
		return err;


	err = mlx5_core_alloc_pd(mdev, &pdn);
	if (err)
		return err;

	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
			      0);
			      0);
	if (err)
	if (err)
		goto err_dma_map;
		goto err_dma_map;


	err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
	if (err)
	if (err)
		goto err_create_mkey;
		goto err_create_mkey;


@@ -357,7 +352,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	migf->total_length = 0;
	migf->total_length = 0;
	get_file(migf->filp);
	get_file(migf->filp);
	async_data->mkey = mkey;
	async_data->mkey = mkey;
	async_data->pdn = pdn;
	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
			       async_data->out,
			       async_data->out,
			       out_size, mlx5vf_save_callback,
			       out_size, mlx5vf_save_callback,
@@ -375,7 +369,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
err_create_mkey:
err_create_mkey:
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
err_dma_map:
	mlx5_core_dealloc_pd(mdev, pdn);
	complete(&migf->save_comp);
	complete(&migf->save_comp);
	return err;
	return err;
}
}
@@ -386,7 +379,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	struct mlx5_core_dev *mdev;
	struct mlx5_core_dev *mdev;
	u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {};
	u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {};
	u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {};
	u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {};
	u32 pdn, mkey;
	u32 mkey;
	int err;
	int err;


	lockdep_assert_held(&mvdev->state_mutex);
	lockdep_assert_held(&mvdev->state_mutex);
@@ -400,15 +393,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	}
	}


	mdev = mvdev->mdev;
	mdev = mvdev->mdev;
	err = mlx5_core_alloc_pd(mdev, &pdn);
	if (err)
		goto end;

	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
	if (err)
	if (err)
		goto err_reg;
		goto end;


	err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
	if (err)
	if (err)
		goto err_mkey;
		goto err_mkey;


@@ -424,13 +413,41 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
	mlx5_core_destroy_mkey(mdev, mkey);
	mlx5_core_destroy_mkey(mdev, mkey);
err_mkey:
err_mkey:
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
err_reg:
	mlx5_core_dealloc_pd(mdev, pdn);
end:
end:
	mutex_unlock(&migf->lock);
	mutex_unlock(&migf->lock);
	return err;
	return err;
}
}


int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf)
{
	int err;

	lockdep_assert_held(&migf->mvdev->state_mutex);
	if (migf->mvdev->mdev_detach)
		return -ENOTCONN;

	err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn);
	return err;
}

void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf)
{
	lockdep_assert_held(&migf->mvdev->state_mutex);
	if (migf->mvdev->mdev_detach)
		return;

	mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn);
}

void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
{
	lockdep_assert_held(&migf->mvdev->state_mutex);

	WARN_ON(migf->mvdev->mdev_detach);

	mlx5vf_cmd_dealloc_pd(migf);
}

static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
			   u32 req_nodes)
			   u32 req_nodes)
{
{
+4 −1
Original line number Original line Diff line number Diff line
@@ -16,7 +16,6 @@ struct mlx5vf_async_data {
	struct mlx5_async_work cb_work;
	struct mlx5_async_work cb_work;
	struct work_struct work;
	struct work_struct work;
	int status;
	int status;
	u32 pdn;
	u32 mkey;
	u32 mkey;
	void *out;
	void *out;
};
};
@@ -27,6 +26,7 @@ struct mlx5_vf_migration_file {
	u8 disabled:1;
	u8 disabled:1;
	u8 is_err:1;
	u8 is_err:1;


	u32 pdn;
	struct sg_append_table table;
	struct sg_append_table table;
	size_t total_length;
	size_t total_length;
	size_t allocated_length;
	size_t allocated_length;
@@ -127,6 +127,9 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
			       struct mlx5_vf_migration_file *migf);
			       struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
			       struct mlx5_vf_migration_file *migf);
			       struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
+32 −12
Original line number Original line Diff line number Diff line
@@ -236,12 +236,15 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
					O_RDONLY);
					O_RDONLY);
	if (IS_ERR(migf->filp)) {
	if (IS_ERR(migf->filp)) {
		int err = PTR_ERR(migf->filp);
		ret = PTR_ERR(migf->filp);

		goto end;
		kfree(migf);
		return ERR_PTR(err);
	}
	}


	migf->mvdev = mvdev;
	ret = mlx5vf_cmd_alloc_pd(migf);
	if (ret)
		goto out_free;

	stream_open(migf->filp->f_inode, migf->filp);
	stream_open(migf->filp->f_inode, migf->filp);
	mutex_init(&migf->lock);
	mutex_init(&migf->lock);
	init_waitqueue_head(&migf->poll_wait);
	init_waitqueue_head(&migf->poll_wait);
@@ -257,20 +260,25 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
						    &migf->total_length);
						    &migf->total_length);
	if (ret)
	if (ret)
		goto out_free;
		goto out_pd;


	ret = mlx5vf_add_migration_pages(
	ret = mlx5vf_add_migration_pages(
		migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
		migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
	if (ret)
	if (ret)
		goto out_free;
		goto out_pd;


	migf->mvdev = mvdev;
	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
	if (ret)
	if (ret)
		goto out_free;
		goto out_save;
	return migf;
	return migf;
out_save:
	mlx5vf_disable_fd(migf);
out_pd:
	mlx5vf_cmd_dealloc_pd(migf);
out_free:
out_free:
	fput(migf->filp);
	fput(migf->filp);
end:
	kfree(migf);
	return ERR_PTR(ret);
	return ERR_PTR(ret);
}
}


@@ -352,6 +360,7 @@ static struct mlx5_vf_migration_file *
mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
{
{
	struct mlx5_vf_migration_file *migf;
	struct mlx5_vf_migration_file *migf;
	int ret;


	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
	if (!migf)
	if (!migf)
@@ -360,20 +369,30 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
					O_WRONLY);
					O_WRONLY);
	if (IS_ERR(migf->filp)) {
	if (IS_ERR(migf->filp)) {
		int err = PTR_ERR(migf->filp);
		ret = PTR_ERR(migf->filp);

		goto end;
		kfree(migf);
		return ERR_PTR(err);
	}
	}

	migf->mvdev = mvdev;
	ret = mlx5vf_cmd_alloc_pd(migf);
	if (ret)
		goto out_free;

	stream_open(migf->filp->f_inode, migf->filp);
	stream_open(migf->filp->f_inode, migf->filp);
	mutex_init(&migf->lock);
	mutex_init(&migf->lock);
	return migf;
	return migf;
out_free:
	fput(migf->filp);
end:
	kfree(migf);
	return ERR_PTR(ret);
}
}


void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
{
{
	if (mvdev->resuming_migf) {
	if (mvdev->resuming_migf) {
		mlx5vf_disable_fd(mvdev->resuming_migf);
		mlx5vf_disable_fd(mvdev->resuming_migf);
		mlx5fv_cmd_clean_migf_resources(mvdev->resuming_migf);
		fput(mvdev->resuming_migf->filp);
		fput(mvdev->resuming_migf->filp);
		mvdev->resuming_migf = NULL;
		mvdev->resuming_migf = NULL;
	}
	}
@@ -381,6 +400,7 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
		mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
		mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
		cancel_work_sync(&mvdev->saving_migf->async_data.work);
		cancel_work_sync(&mvdev->saving_migf->async_data.work);
		mlx5vf_disable_fd(mvdev->saving_migf);
		mlx5vf_disable_fd(mvdev->saving_migf);
		mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
		fput(mvdev->saving_migf->filp);
		fput(mvdev->saving_migf->filp);
		mvdev->saving_migf = NULL;
		mvdev->saving_migf = NULL;
	}
	}