Commit bb500dbe authored by Brett Creeley's avatar Brett Creeley Committed by Alex Williamson
Browse files

vfio/pds: Add VFIO live migration support



Add live migration support via the VFIO subsystem. The migration
implementation aligns with the definition from uapi/vfio.h and uses
the pds_core PF's adminq for device configuration.

The ability to suspend, resume, and transfer VF device state data is
included along with the required admin queue command structures and
implementations.

PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support
the VF device suspend operation.

PDS_LM_CMD_RESUME is added to support the VF device resume operation.

PDS_LM_CMD_STATE_SIZE is added to determine the exact size of the VF
device state data.

PDS_LM_CMD_SAVE is added to get the VF device state data.

PDS_LM_CMD_RESTORE is added to restore the VF device with the
previously saved data from PDS_LM_CMD_SAVE.

PDS_LM_CMD_HOST_VF_STATUS is added to notify the DSC/firmware when
a migration is in/not-in progress from the host's perspective. The
DSC/firmware can use this to clear/setup any necessary state related
to a migration.

Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarShameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20230807205755.29579-6-brett.creeley@amd.com


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 63f77a71
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -5,5 +5,6 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds-vfio-pci.o

pds-vfio-pci-y := \
	cmds.o		\
	lm.o		\
	pci_drv.o	\
	vfio_dev.o
+330 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@

#include <linux/io.h>
#include <linux/types.h>
#include <linux/delay.h>

#include <linux/pds/pds_common.h>
#include <linux/pds/pds_core_if.h>
@@ -11,6 +12,37 @@
#include "vfio_dev.h"
#include "cmds.h"

#define SUSPEND_TIMEOUT_S		5
#define SUSPEND_CHECK_INTERVAL_MS	1

static int pds_vfio_client_adminq_cmd(struct pds_vfio_pci_device *pds_vfio,
				      union pds_core_adminq_cmd *req,
				      union pds_core_adminq_comp *resp,
				      bool fast_poll)
{
	struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
	union pds_core_adminq_cmd cmd = {};
	struct pdsc *pdsc;
	int err;

	/* Wrap the client request */
	cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD;
	cmd.client_request.client_id = cpu_to_le16(pds_vfio->client_id);
	memcpy(cmd.client_request.client_cmd, req,
	       sizeof(cmd.client_request.client_cmd));

	pdsc = pdsc_get_pf_struct(pdev);
	if (IS_ERR(pdsc))
		return PTR_ERR(pdsc);

	err = pdsc_adminq_post(pdsc, &cmd, resp, fast_poll);
	if (err && err != -EAGAIN)
		dev_err(pds_vfio_to_dev(pds_vfio),
			"client admin cmd failed: %pe\n", ERR_PTR(err));

	return err;
}

int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio)
{
	struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
@@ -52,3 +84,301 @@ void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio)

	pds_vfio->client_id = 0;
}

static int
pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio)
{
	union pds_core_adminq_cmd cmd = {
		.lm_suspend_status = {
			.opcode = PDS_LM_CMD_SUSPEND_STATUS,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
		},
	};
	struct device *dev = pds_vfio_to_dev(pds_vfio);
	union pds_core_adminq_comp comp = {};
	unsigned long time_limit;
	unsigned long time_start;
	unsigned long time_done;
	int err;

	time_start = jiffies;
	time_limit = time_start + HZ * SUSPEND_TIMEOUT_S;
	do {
		err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
		if (err != -EAGAIN)
			break;

		msleep(SUSPEND_CHECK_INTERVAL_MS);
	} while (time_before(jiffies, time_limit));

	time_done = jiffies;
	dev_dbg(dev, "%s: vf%u: Suspend comp received in %d msecs\n", __func__,
		pds_vfio->vf_id, jiffies_to_msecs(time_done - time_start));

	/* Check the results */
	if (time_after_eq(time_done, time_limit)) {
		dev_err(dev, "%s: vf%u: Suspend comp timeout\n", __func__,
			pds_vfio->vf_id);
		err = -ETIMEDOUT;
	}

	return err;
}

int pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
{
	union pds_core_adminq_cmd cmd = {
		.lm_suspend = {
			.opcode = PDS_LM_CMD_SUSPEND,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
			.type = type,
		},
	};
	struct device *dev = pds_vfio_to_dev(pds_vfio);
	union pds_core_adminq_comp comp = {};
	int err;

	dev_dbg(dev, "vf%u: Suspend device\n", pds_vfio->vf_id);

	/*
	 * The initial suspend request to the firmware starts the device suspend
	 * operation and the firmware returns success if it's started
	 * successfully.
	 */
	err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
	if (err) {
		dev_err(dev, "vf%u: Suspend failed: %pe\n", pds_vfio->vf_id,
			ERR_PTR(err));
		return err;
	}

	/*
	 * The subsequent suspend status request(s) check if the firmware has
	 * completed the device suspend process.
	 */
	return pds_vfio_suspend_wait_device_cmd(pds_vfio);
}

int pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type)
{
	union pds_core_adminq_cmd cmd = {
		.lm_resume = {
			.opcode = PDS_LM_CMD_RESUME,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
			.type = type,
		},
	};
	struct device *dev = pds_vfio_to_dev(pds_vfio);
	union pds_core_adminq_comp comp = {};

	dev_dbg(dev, "vf%u: Resume device\n", pds_vfio->vf_id);

	return pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, true);
}

int pds_vfio_get_lm_state_size_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size)
{
	union pds_core_adminq_cmd cmd = {
		.lm_state_size = {
			.opcode = PDS_LM_CMD_STATE_SIZE,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
		},
	};
	struct device *dev = pds_vfio_to_dev(pds_vfio);
	union pds_core_adminq_comp comp = {};
	int err;

	dev_dbg(dev, "vf%u: Get migration status\n", pds_vfio->vf_id);

	err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
	if (err)
		return err;

	*size = le64_to_cpu(comp.lm_state_size.size);
	return 0;
}

static int pds_vfio_dma_map_lm_file(struct device *dev,
				    enum dma_data_direction dir,
				    struct pds_vfio_lm_file *lm_file)
{
	struct pds_lm_sg_elem *sgl, *sge;
	struct scatterlist *sg;
	dma_addr_t sgl_addr;
	size_t sgl_size;
	int err;
	int i;

	if (!lm_file)
		return -EINVAL;

	/* dma map file pages */
	err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0);
	if (err)
		return err;

	lm_file->num_sge = lm_file->sg_table.nents;

	/* alloc sgl */
	sgl_size = lm_file->num_sge * sizeof(struct pds_lm_sg_elem);
	sgl = kzalloc(sgl_size, GFP_KERNEL);
	if (!sgl) {
		err = -ENOMEM;
		goto out_unmap_sgtable;
	}

	/* fill sgl */
	sge = sgl;
	for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) {
		sge->addr = cpu_to_le64(sg_dma_address(sg));
		sge->len = cpu_to_le32(sg_dma_len(sg));
		dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len);
		sge++;
	}

	sgl_addr = dma_map_single(dev, sgl, sgl_size, DMA_TO_DEVICE);
	if (dma_mapping_error(dev, sgl_addr)) {
		err = -EIO;
		goto out_free_sgl;
	}

	lm_file->sgl = sgl;
	lm_file->sgl_addr = sgl_addr;

	return 0;

out_free_sgl:
	kfree(sgl);
out_unmap_sgtable:
	lm_file->num_sge = 0;
	dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
	return err;
}

static void pds_vfio_dma_unmap_lm_file(struct device *dev,
				       enum dma_data_direction dir,
				       struct pds_vfio_lm_file *lm_file)
{
	if (!lm_file)
		return;

	/* free sgl */
	if (lm_file->sgl) {
		dma_unmap_single(dev, lm_file->sgl_addr,
				 lm_file->num_sge * sizeof(*lm_file->sgl),
				 DMA_TO_DEVICE);
		kfree(lm_file->sgl);
		lm_file->sgl = NULL;
		lm_file->sgl_addr = DMA_MAPPING_ERROR;
		lm_file->num_sge = 0;
	}

	/* dma unmap file pages */
	dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0);
}

int pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
{
	union pds_core_adminq_cmd cmd = {
		.lm_save = {
			.opcode = PDS_LM_CMD_SAVE,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
		},
	};
	struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
	union pds_core_adminq_comp comp = {};
	struct pds_vfio_lm_file *lm_file;
	int err;

	dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id);

	lm_file = pds_vfio->save_file;

	err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);
	if (err) {
		dev_err(&pdev->dev, "failed to map save migration file: %pe\n",
			ERR_PTR(err));
		return err;
	}

	cmd.lm_save.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
	cmd.lm_save.num_sge = cpu_to_le32(lm_file->num_sge);

	err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
	if (err)
		dev_err(&pdev->dev, "failed to get migration state: %pe\n",
			ERR_PTR(err));

	pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_FROM_DEVICE, lm_file);

	return err;
}

int pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio)
{
	union pds_core_adminq_cmd cmd = {
		.lm_restore = {
			.opcode = PDS_LM_CMD_RESTORE,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
		},
	};
	struct pci_dev *pdev = pds_vfio_to_pci_dev(pds_vfio);
	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
	union pds_core_adminq_comp comp = {};
	struct pds_vfio_lm_file *lm_file;
	int err;

	dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id);

	lm_file = pds_vfio->restore_file;

	err = pds_vfio_dma_map_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);
	if (err) {
		dev_err(&pdev->dev,
			"failed to map restore migration file: %pe\n",
			ERR_PTR(err));
		return err;
	}

	cmd.lm_restore.sgl_addr = cpu_to_le64(lm_file->sgl_addr);
	cmd.lm_restore.num_sge = cpu_to_le32(lm_file->num_sge);

	err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
	if (err)
		dev_err(&pdev->dev, "failed to set migration state: %pe\n",
			ERR_PTR(err));

	pds_vfio_dma_unmap_lm_file(pdsc_dev, DMA_TO_DEVICE, lm_file);

	return err;
}

void pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio,
					 enum pds_lm_host_vf_status vf_status)
{
	union pds_core_adminq_cmd cmd = {
		.lm_host_vf_status = {
			.opcode = PDS_LM_CMD_HOST_VF_STATUS,
			.vf_id = cpu_to_le16(pds_vfio->vf_id),
			.status = vf_status,
		},
	};
	struct device *dev = pds_vfio_to_dev(pds_vfio);
	union pds_core_adminq_comp comp = {};
	int err;

	dev_dbg(dev, "vf%u: Set host VF LM status: %u", pds_vfio->vf_id,
		vf_status);
	if (vf_status != PDS_LM_STA_IN_PROGRESS &&
	    vf_status != PDS_LM_STA_NONE) {
		dev_warn(dev, "Invalid host VF migration status, %d\n",
			 vf_status);
		return;
	}

	err = pds_vfio_client_adminq_cmd(pds_vfio, &cmd, &comp, false);
	if (err)
		dev_warn(dev, "failed to send host VF migration status: %pe\n",
			 ERR_PTR(err));
}
+7 −1
Original line number Diff line number Diff line
@@ -6,5 +6,11 @@

int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio);

int pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type);
int pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio, u8 type);
int pds_vfio_get_lm_state_size_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size);
int pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio);
int pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio,
					 enum pds_lm_host_vf_status vf_status);
#endif /* _CMDS_H_ */
+434 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2023 Advanced Micro Devices, Inc. */

#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/vfio.h>
#include <linux/vfio_pci_core.h>

#include "vfio_dev.h"
#include "cmds.h"

static struct pds_vfio_lm_file *
pds_vfio_get_lm_file(const struct file_operations *fops, int flags, u64 size)
{
	struct pds_vfio_lm_file *lm_file = NULL;
	unsigned long long npages;
	struct page **pages;
	void *page_mem;
	const void *p;

	if (!size)
		return NULL;

	/* Alloc file structure */
	lm_file = kzalloc(sizeof(*lm_file), GFP_KERNEL);
	if (!lm_file)
		return NULL;

	/* Create file */
	lm_file->filep =
		anon_inode_getfile("pds_vfio_lm", fops, lm_file, flags);
	if (!lm_file->filep)
		goto out_free_file;

	stream_open(lm_file->filep->f_inode, lm_file->filep);
	mutex_init(&lm_file->lock);

	/* prevent file from being released before we are done with it */
	get_file(lm_file->filep);

	/* Allocate memory for file pages */
	npages = DIV_ROUND_UP_ULL(size, PAGE_SIZE);
	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
	if (!pages)
		goto out_put_file;

	page_mem = kvzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL);
	if (!page_mem)
		goto out_free_pages_array;

	p = page_mem - offset_in_page(page_mem);
	for (unsigned long long i = 0; i < npages; i++) {
		if (is_vmalloc_addr(p))
			pages[i] = vmalloc_to_page(p);
		else
			pages[i] = kmap_to_page((void *)p);
		if (!pages[i])
			goto out_free_page_mem;

		p += PAGE_SIZE;
	}

	/* Create scatterlist of file pages to use for DMA mapping later */
	if (sg_alloc_table_from_pages(&lm_file->sg_table, pages, npages, 0,
				      size, GFP_KERNEL))
		goto out_free_page_mem;

	lm_file->size = size;
	lm_file->pages = pages;
	lm_file->npages = npages;
	lm_file->page_mem = page_mem;
	lm_file->alloc_size = npages * PAGE_SIZE;

	return lm_file;

out_free_page_mem:
	kvfree(page_mem);
out_free_pages_array:
	kfree(pages);
out_put_file:
	fput(lm_file->filep);
	mutex_destroy(&lm_file->lock);
out_free_file:
	kfree(lm_file);

	return NULL;
}

static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file)
{
	mutex_lock(&lm_file->lock);

	lm_file->size = 0;
	lm_file->alloc_size = 0;

	/* Free scatter list of file pages */
	sg_free_table(&lm_file->sg_table);

	kvfree(lm_file->page_mem);
	lm_file->page_mem = NULL;
	kfree(lm_file->pages);
	lm_file->pages = NULL;

	mutex_unlock(&lm_file->lock);

	/* allow file to be released since we are done with it */
	fput(lm_file->filep);
}

void pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio)
{
	if (!pds_vfio->save_file)
		return;

	pds_vfio_put_lm_file(pds_vfio->save_file);
	pds_vfio->save_file = NULL;
}

void pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio)
{
	if (!pds_vfio->restore_file)
		return;

	pds_vfio_put_lm_file(pds_vfio->restore_file);
	pds_vfio->restore_file = NULL;
}

static struct page *pds_vfio_get_file_page(struct pds_vfio_lm_file *lm_file,
					   unsigned long offset)
{
	unsigned long cur_offset = 0;
	struct scatterlist *sg;
	unsigned int i;

	/* All accesses are sequential */
	if (offset < lm_file->last_offset || !lm_file->last_offset_sg) {
		lm_file->last_offset = 0;
		lm_file->last_offset_sg = lm_file->sg_table.sgl;
		lm_file->sg_last_entry = 0;
	}

	cur_offset = lm_file->last_offset;

	for_each_sg(lm_file->last_offset_sg, sg,
		    lm_file->sg_table.orig_nents - lm_file->sg_last_entry, i) {
		if (offset < sg->length + cur_offset) {
			lm_file->last_offset_sg = sg;
			lm_file->sg_last_entry += i;
			lm_file->last_offset = cur_offset;
			return nth_page(sg_page(sg),
					(offset - cur_offset) / PAGE_SIZE);
		}
		cur_offset += sg->length;
	}

	return NULL;
}

static int pds_vfio_release_file(struct inode *inode, struct file *filp)
{
	struct pds_vfio_lm_file *lm_file = filp->private_data;

	mutex_lock(&lm_file->lock);
	lm_file->filep->f_pos = 0;
	lm_file->size = 0;
	mutex_unlock(&lm_file->lock);
	mutex_destroy(&lm_file->lock);
	kfree(lm_file);

	return 0;
}

static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf,
				  size_t len, loff_t *pos)
{
	struct pds_vfio_lm_file *lm_file = filp->private_data;
	ssize_t done = 0;

	if (pos)
		return -ESPIPE;
	pos = &filp->f_pos;

	mutex_lock(&lm_file->lock);
	if (*pos > lm_file->size) {
		done = -EINVAL;
		goto out_unlock;
	}

	len = min_t(size_t, lm_file->size - *pos, len);
	while (len) {
		size_t page_offset;
		struct page *page;
		size_t page_len;
		u8 *from_buff;
		int err;

		page_offset = (*pos) % PAGE_SIZE;
		page = pds_vfio_get_file_page(lm_file, *pos - page_offset);
		if (!page) {
			if (done == 0)
				done = -EINVAL;
			goto out_unlock;
		}

		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
		from_buff = kmap_local_page(page);
		err = copy_to_user(buf, from_buff + page_offset, page_len);
		kunmap_local(from_buff);
		if (err) {
			done = -EFAULT;
			goto out_unlock;
		}
		*pos += page_len;
		len -= page_len;
		done += page_len;
		buf += page_len;
	}

out_unlock:
	mutex_unlock(&lm_file->lock);
	return done;
}

static const struct file_operations pds_vfio_save_fops = {
	.owner = THIS_MODULE,
	.read = pds_vfio_save_read,
	.release = pds_vfio_release_file,
	.llseek = no_llseek,
};

static int pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio)
{
	struct device *dev = &pds_vfio->vfio_coredev.pdev->dev;
	struct pds_vfio_lm_file *lm_file;
	u64 size;
	int err;

	/* Get live migration state size in this state */
	err = pds_vfio_get_lm_state_size_cmd(pds_vfio, &size);
	if (err) {
		dev_err(dev, "failed to get save status: %pe\n", ERR_PTR(err));
		return err;
	}

	dev_dbg(dev, "save status, size = %lld\n", size);

	if (!size) {
		dev_err(dev, "invalid state size\n");
		return -EIO;
	}

	lm_file = pds_vfio_get_lm_file(&pds_vfio_save_fops, O_RDONLY, size);
	if (!lm_file) {
		dev_err(dev, "failed to create save file\n");
		return -ENOENT;
	}

	dev_dbg(dev, "size = %lld, alloc_size = %lld, npages = %lld\n",
		lm_file->size, lm_file->alloc_size, lm_file->npages);

	pds_vfio->save_file = lm_file;

	return 0;
}

static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf,
				      size_t len, loff_t *pos)
{
	struct pds_vfio_lm_file *lm_file = filp->private_data;
	loff_t requested_length;
	ssize_t done = 0;

	if (pos)
		return -ESPIPE;

	pos = &filp->f_pos;

	if (*pos < 0 ||
	    check_add_overflow((loff_t)len, *pos, &requested_length))
		return -EINVAL;

	mutex_lock(&lm_file->lock);

	while (len) {
		size_t page_offset;
		struct page *page;
		size_t page_len;
		u8 *to_buff;
		int err;

		page_offset = (*pos) % PAGE_SIZE;
		page = pds_vfio_get_file_page(lm_file, *pos - page_offset);
		if (!page) {
			if (done == 0)
				done = -EINVAL;
			goto out_unlock;
		}

		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
		to_buff = kmap_local_page(page);
		err = copy_from_user(to_buff + page_offset, buf, page_len);
		kunmap_local(to_buff);
		if (err) {
			done = -EFAULT;
			goto out_unlock;
		}
		*pos += page_len;
		len -= page_len;
		done += page_len;
		buf += page_len;
		lm_file->size += page_len;
	}
out_unlock:
	mutex_unlock(&lm_file->lock);
	return done;
}

static const struct file_operations pds_vfio_restore_fops = {
	.owner = THIS_MODULE,
	.write = pds_vfio_restore_write,
	.release = pds_vfio_release_file,
	.llseek = no_llseek,
};

static int pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio)
{
	struct device *dev = &pds_vfio->vfio_coredev.pdev->dev;
	struct pds_vfio_lm_file *lm_file;
	u64 size;

	size = sizeof(union pds_lm_dev_state);
	dev_dbg(dev, "restore status, size = %lld\n", size);

	if (!size) {
		dev_err(dev, "invalid state size");
		return -EIO;
	}

	lm_file = pds_vfio_get_lm_file(&pds_vfio_restore_fops, O_WRONLY, size);
	if (!lm_file) {
		dev_err(dev, "failed to create restore file");
		return -ENOENT;
	}
	pds_vfio->restore_file = lm_file;

	return 0;
}

struct file *
pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio,
				  enum vfio_device_mig_state next)
{
	enum vfio_device_mig_state cur = pds_vfio->state;
	int err;

	if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) {
		err = pds_vfio_get_save_file(pds_vfio);
		if (err)
			return ERR_PTR(err);

		err = pds_vfio_get_lm_state_cmd(pds_vfio);
		if (err) {
			pds_vfio_put_save_file(pds_vfio);
			return ERR_PTR(err);
		}

		return pds_vfio->save_file->filep;
	}

	if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) {
		pds_vfio_put_save_file(pds_vfio);
		pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE);
		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) {
		err = pds_vfio_get_restore_file(pds_vfio);
		if (err)
			return ERR_PTR(err);

		return pds_vfio->restore_file->filep;
	}

	if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) {
		err = pds_vfio_set_lm_state_cmd(pds_vfio);
		if (err)
			return ERR_PTR(err);

		pds_vfio_put_restore_file(pds_vfio);
		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) {
		pds_vfio_send_host_vf_lm_status_cmd(pds_vfio,
						    PDS_LM_STA_IN_PROGRESS);
		err = pds_vfio_suspend_device_cmd(pds_vfio,
						  PDS_LM_SUSPEND_RESUME_TYPE_P2P);
		if (err)
			return ERR_PTR(err);

		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) {
		err = pds_vfio_resume_device_cmd(pds_vfio,
						 PDS_LM_SUSPEND_RESUME_TYPE_FULL);
		if (err)
			return ERR_PTR(err);

		pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE);
		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P) {
		err = pds_vfio_resume_device_cmd(pds_vfio,
						 PDS_LM_SUSPEND_RESUME_TYPE_P2P);
		if (err)
			return ERR_PTR(err);

		return NULL;
	}

	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP) {
		err = pds_vfio_suspend_device_cmd(pds_vfio,
						  PDS_LM_SUSPEND_RESUME_TYPE_FULL);
		if (err)
			return ERR_PTR(err);
		return NULL;
	}

	return ERR_PTR(-EINVAL);
}
+41 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2023 Advanced Micro Devices, Inc. */

#ifndef _LM_H_
#define _LM_H_

#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/types.h>

#include <linux/pds/pds_common.h>
#include <linux/pds/pds_adminq.h>

struct pds_vfio_lm_file {
	struct file *filep;
	struct mutex lock;	/* protect live migration data file */
	u64 size;		/* Size with valid data */
	u64 alloc_size;		/* Total allocated size. Always >= len */
	void *page_mem;		/* memory allocated for pages */
	struct page **pages;	/* Backing pages for file */
	unsigned long long npages;
	struct sg_table sg_table;	/* SG table for backing pages */
	struct pds_lm_sg_elem *sgl;	/* DMA mapping */
	dma_addr_t sgl_addr;
	u16 num_sge;
	struct scatterlist *last_offset_sg;	/* Iterator */
	unsigned int sg_last_entry;
	unsigned long last_offset;
};

struct pds_vfio_pci_device;

struct file *
pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio,
				  enum vfio_device_mig_state next);

void pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio);

#endif /* _LM_H_ */
Loading