Commit 4c2e0f76 authored by Alex Sierra's avatar Alex Sierra Committed by Andrew Morton
Browse files

lib: add support for device coherent type in test_hmm

Device Coherent type uses device memory that is coherently accesible by
the CPU.  This could be shown as SP (special purpose) memory range at the
BIOS-e820 memory enumeration.  If no SP memory is supported in system,
this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.

Currently, test_hmm only supports two different SP ranges of at least
256MB size. This could be specified in the kernel parameter variable
efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 &
0x140000000 physical address. Ex.
efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000

Private and coherent device mirror instances can be created in the same
probed.  This is done by passing the module parameters spm_addr_dev0 &
spm_addr_dev1.  In this case, it will create four instances of
device_mirror.  The first two correspond to private device type, the last
two to coherent type.  Then, they can be easily accessed from user space
through /dev/hmm_mirror<num_device>.  Usually num_device 0 and 1 are for
private, and 2 and 3 for coherent types.  If no module parameters are
passed, two instances of private type device_mirror will be created only.

Link: https://lkml.kernel.org/r/20220715150521.18165-11-alex.sierra@amd.com


Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Acked-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarAlistair Poppple <apopple@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 25b80162
Loading
Loading
Loading
Loading
+192 −61
Original line number Original line Diff line number Diff line
@@ -32,11 +32,22 @@


#include "test_hmm_uapi.h"
#include "test_hmm_uapi.h"


#define DMIRROR_NDEVICES		2
#define DMIRROR_NDEVICES		4
#define DMIRROR_RANGE_FAULT_TIMEOUT	1000
#define DMIRROR_RANGE_FAULT_TIMEOUT	1000
#define DEVMEM_CHUNK_SIZE		(256 * 1024 * 1024U)
#define DEVMEM_CHUNK_SIZE		(256 * 1024 * 1024U)
#define DEVMEM_CHUNKS_RESERVE		16
#define DEVMEM_CHUNKS_RESERVE		16


/*
 * For device_private pages, dpage is just a dummy struct page
 * representing a piece of device memory. dmirror_devmem_alloc_page
 * allocates a real system memory page as backing storage to fake a
 * real device. zone_device_data points to that backing page. But
 * for device_coherent memory, the struct page represents real
 * physical CPU-accessible memory that we can use directly.
 */
#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
			   (page)->zone_device_data : (page))

static unsigned long spm_addr_dev0;
static unsigned long spm_addr_dev0;
module_param(spm_addr_dev0, long, 0644);
module_param(spm_addr_dev0, long, 0644);
MODULE_PARM_DESC(spm_addr_dev0,
MODULE_PARM_DESC(spm_addr_dev0,
@@ -125,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce,
	return 0;
	return 0;
}
}


static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
{
	return (mdevice->zone_device_type ==
		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
}

static enum migrate_vma_direction
dmirror_select_device(struct dmirror *dmirror)
{
	return (dmirror->mdevice->zone_device_type ==
		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
		MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
		MIGRATE_VMA_SELECT_DEVICE_COHERENT;
}

static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
{
{
	vfree(bounce->ptr);
	vfree(bounce->ptr);
@@ -575,16 +601,19 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
{
{
	struct page *dpage = NULL;
	struct page *dpage = NULL;
	struct page *rpage;
	struct page *rpage = NULL;


	/*
	/*
	 * This is a fake device so we alloc real system memory to store
	 * For ZONE_DEVICE private type, this is a fake device so we allocate
	 * our device memory.
	 * real system memory to store our device memory.
	 * For ZONE_DEVICE coherent type we use the actual dpage to store the
	 * data and ignore rpage.
	 */
	 */
	if (dmirror_is_private_zone(mdevice)) {
		rpage = alloc_page(GFP_HIGHUSER);
		rpage = alloc_page(GFP_HIGHUSER);
		if (!rpage)
		if (!rpage)
			return NULL;
			return NULL;

	}
	spin_lock(&mdevice->lock);
	spin_lock(&mdevice->lock);


	if (mdevice->free_pages) {
	if (mdevice->free_pages) {
@@ -603,6 +632,7 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
	return dpage;
	return dpage;


error:
error:
	if (rpage)
		__free_page(rpage);
		__free_page(rpage);
	return NULL;
	return NULL;
}
}
@@ -629,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
		 * unallocated pte_none() or read-only zero page.
		 * unallocated pte_none() or read-only zero page.
		 */
		 */
		spage = migrate_pfn_to_page(*src);
		spage = migrate_pfn_to_page(*src);
		if (WARN(spage && is_zone_device_page(spage),
		     "page already in device spage pfn: 0x%lx\n",
		     page_to_pfn(spage)))
			continue;


		dpage = dmirror_devmem_alloc_page(mdevice);
		dpage = dmirror_devmem_alloc_page(mdevice);
		if (!dpage)
		if (!dpage)
			continue;
			continue;


		rpage = dpage->zone_device_data;
		rpage = BACKING_PAGE(dpage);
		if (spage)
		if (spage)
			copy_highpage(rpage, spage);
			copy_highpage(rpage, spage);
		else
		else
@@ -648,6 +682,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
		 */
		 */
		rpage->zone_device_data = dmirror;
		rpage->zone_device_data = dmirror;


		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
			 page_to_pfn(spage), page_to_pfn(dpage));
		*dst = migrate_pfn(page_to_pfn(dpage));
		*dst = migrate_pfn(page_to_pfn(dpage));
		if ((*src & MIGRATE_PFN_WRITE) ||
		if ((*src & MIGRATE_PFN_WRITE) ||
		    (!spage && args->vma->vm_flags & VM_WRITE))
		    (!spage && args->vma->vm_flags & VM_WRITE))
@@ -725,11 +761,7 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
		if (!dpage)
		if (!dpage)
			continue;
			continue;


		/*
		entry = BACKING_PAGE(dpage);
		 * Store the page that holds the data so the page table
		 * doesn't have to deal with ZONE_DEVICE private pages.
		 */
		entry = dpage->zone_device_data;
		if (*dst & MIGRATE_PFN_WRITE)
		if (*dst & MIGRATE_PFN_WRITE)
			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -815,15 +847,126 @@ static int dmirror_exclusive(struct dmirror *dmirror,
	return ret;
	return ret;
}
}


static int dmirror_migrate(struct dmirror *dmirror,
static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
						      struct dmirror *dmirror)
{
	const unsigned long *src = args->src;
	unsigned long *dst = args->dst;
	unsigned long start = args->start;
	unsigned long end = args->end;
	unsigned long addr;

	for (addr = start; addr < end; addr += PAGE_SIZE,
				       src++, dst++) {
		struct page *dpage, *spage;

		spage = migrate_pfn_to_page(*src);
		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
			continue;

		if (WARN_ON(!is_device_private_page(spage) &&
			    !is_device_coherent_page(spage)))
			continue;
		spage = BACKING_PAGE(spage);
		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
		if (!dpage)
			continue;
		pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
			 page_to_pfn(spage), page_to_pfn(dpage));

		lock_page(dpage);
		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
		copy_highpage(dpage, spage);
		*dst = migrate_pfn(page_to_pfn(dpage));
		if (*src & MIGRATE_PFN_WRITE)
			*dst |= MIGRATE_PFN_WRITE;
	}
	return 0;
}

static unsigned long
dmirror_successful_migrated_pages(struct migrate_vma *migrate)
{
	unsigned long cpages = 0;
	unsigned long i;

	for (i = 0; i < migrate->npages; i++) {
		if (migrate->src[i] & MIGRATE_PFN_VALID &&
		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
			cpages++;
	}
	return cpages;
}

static int dmirror_migrate_to_system(struct dmirror *dmirror,
				     struct hmm_dmirror_cmd *cmd)
{
	unsigned long start, end, addr;
	unsigned long size = cmd->npages << PAGE_SHIFT;
	struct mm_struct *mm = dmirror->notifier.mm;
	struct vm_area_struct *vma;
	unsigned long src_pfns[64] = { 0 };
	unsigned long dst_pfns[64] = { 0 };
	struct migrate_vma args;
	unsigned long next;
	int ret;

	start = cmd->addr;
	end = start + size;
	if (end < start)
		return -EINVAL;

	/* Since the mm is for the mirrored process, get a reference first. */
	if (!mmget_not_zero(mm))
		return -EINVAL;

	cmd->cpages = 0;
	mmap_read_lock(mm);
	for (addr = start; addr < end; addr = next) {
		vma = vma_lookup(mm, addr);
		if (!vma || !(vma->vm_flags & VM_READ)) {
			ret = -EINVAL;
			goto out;
		}
		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
		if (next > vma->vm_end)
			next = vma->vm_end;

		args.vma = vma;
		args.src = src_pfns;
		args.dst = dst_pfns;
		args.start = addr;
		args.end = next;
		args.pgmap_owner = dmirror->mdevice;
		args.flags = dmirror_select_device(dmirror);

		ret = migrate_vma_setup(&args);
		if (ret)
			goto out;

		pr_debug("Migrating from device mem to sys mem\n");
		dmirror_devmem_fault_alloc_and_copy(&args, dmirror);

		migrate_vma_pages(&args);
		cmd->cpages += dmirror_successful_migrated_pages(&args);
		migrate_vma_finalize(&args);
	}
out:
	mmap_read_unlock(mm);
	mmput(mm);

	return ret;
}

static int dmirror_migrate_to_device(struct dmirror *dmirror,
				struct hmm_dmirror_cmd *cmd)
				struct hmm_dmirror_cmd *cmd)
{
{
	unsigned long start, end, addr;
	unsigned long start, end, addr;
	unsigned long size = cmd->npages << PAGE_SHIFT;
	unsigned long size = cmd->npages << PAGE_SHIFT;
	struct mm_struct *mm = dmirror->notifier.mm;
	struct mm_struct *mm = dmirror->notifier.mm;
	struct vm_area_struct *vma;
	struct vm_area_struct *vma;
	unsigned long src_pfns[64];
	unsigned long src_pfns[64] = { 0 };
	unsigned long dst_pfns[64];
	unsigned long dst_pfns[64] = { 0 };
	struct dmirror_bounce bounce;
	struct dmirror_bounce bounce;
	struct migrate_vma args;
	struct migrate_vma args;
	unsigned long next;
	unsigned long next;
@@ -860,6 +1003,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
		if (ret)
		if (ret)
			goto out;
			goto out;


		pr_debug("Migrating from sys mem to device mem\n");
		dmirror_migrate_alloc_and_copy(&args, dmirror);
		dmirror_migrate_alloc_and_copy(&args, dmirror);
		migrate_vma_pages(&args);
		migrate_vma_pages(&args);
		dmirror_migrate_finalize_and_map(&args, dmirror);
		dmirror_migrate_finalize_and_map(&args, dmirror);
@@ -868,7 +1012,10 @@ static int dmirror_migrate(struct dmirror *dmirror,
	mmap_read_unlock(mm);
	mmap_read_unlock(mm);
	mmput(mm);
	mmput(mm);


	/* Return the migrated data for verification. */
	/*
	 * Return the migrated data for verification.
	 * Only for pages in device zone
	 */
	ret = dmirror_bounce_init(&bounce, start, size);
	ret = dmirror_bounce_init(&bounce, start, size);
	if (ret)
	if (ret)
		return ret;
		return ret;
@@ -911,6 +1058,12 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
		else
		else
			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
	} else if (is_device_coherent_page(page)) {
		/* Is the page migrated to this device or some other? */
		if (dmirror->mdevice == dmirror_page_to_device(page))
			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
		else
			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
	} else if (is_zero_pfn(page_to_pfn(page)))
	} else if (is_zero_pfn(page_to_pfn(page)))
		*perm = HMM_DMIRROR_PROT_ZERO;
		*perm = HMM_DMIRROR_PROT_ZERO;
	else
	else
@@ -1098,8 +1251,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
		ret = dmirror_write(dmirror, &cmd);
		ret = dmirror_write(dmirror, &cmd);
		break;
		break;


	case HMM_DMIRROR_MIGRATE:
	case HMM_DMIRROR_MIGRATE_TO_DEV:
		ret = dmirror_migrate(dmirror, &cmd);
		ret = dmirror_migrate_to_device(dmirror, &cmd);
		break;

	case HMM_DMIRROR_MIGRATE_TO_SYS:
		ret = dmirror_migrate_to_system(dmirror, &cmd);
		break;
		break;


	case HMM_DMIRROR_EXCLUSIVE:
	case HMM_DMIRROR_EXCLUSIVE:
@@ -1161,14 +1318,13 @@ static const struct file_operations dmirror_fops = {


static void dmirror_devmem_free(struct page *page)
static void dmirror_devmem_free(struct page *page)
{
{
	struct page *rpage = page->zone_device_data;
	struct page *rpage = BACKING_PAGE(page);
	struct dmirror_device *mdevice;
	struct dmirror_device *mdevice;


	if (rpage)
	if (rpage != page)
		__free_page(rpage);
		__free_page(rpage);


	mdevice = dmirror_page_to_device(page);
	mdevice = dmirror_page_to_device(page);

	spin_lock(&mdevice->lock);
	spin_lock(&mdevice->lock);
	mdevice->cfree++;
	mdevice->cfree++;
	page->zone_device_data = mdevice->free_pages;
	page->zone_device_data = mdevice->free_pages;
@@ -1176,43 +1332,11 @@ static void dmirror_devmem_free(struct page *page)
	spin_unlock(&mdevice->lock);
	spin_unlock(&mdevice->lock);
}
}


static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
						      struct dmirror *dmirror)
{
	const unsigned long *src = args->src;
	unsigned long *dst = args->dst;
	unsigned long start = args->start;
	unsigned long end = args->end;
	unsigned long addr;

	for (addr = start; addr < end; addr += PAGE_SIZE,
				       src++, dst++) {
		struct page *dpage, *spage;

		spage = migrate_pfn_to_page(*src);
		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
			continue;
		spage = spage->zone_device_data;

		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
		if (!dpage)
			continue;

		lock_page(dpage);
		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
		copy_highpage(dpage, spage);
		*dst = migrate_pfn(page_to_pfn(dpage));
		if (*src & MIGRATE_PFN_WRITE)
			*dst |= MIGRATE_PFN_WRITE;
	}
	return 0;
}

static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
{
	struct migrate_vma args;
	struct migrate_vma args;
	unsigned long src_pfns;
	unsigned long src_pfns = 0;
	unsigned long dst_pfns;
	unsigned long dst_pfns = 0;
	struct page *rpage;
	struct page *rpage;
	struct dmirror *dmirror;
	struct dmirror *dmirror;
	vm_fault_t ret;
	vm_fault_t ret;
@@ -1232,7 +1356,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
	args.src = &src_pfns;
	args.src = &src_pfns;
	args.dst = &dst_pfns;
	args.dst = &dst_pfns;
	args.pgmap_owner = dmirror->mdevice;
	args.pgmap_owner = dmirror->mdevice;
	args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
	args.flags = dmirror_select_device(dmirror);


	if (migrate_vma_setup(&args))
	if (migrate_vma_setup(&args))
		return VM_FAULT_SIGBUS;
		return VM_FAULT_SIGBUS;
@@ -1311,6 +1435,12 @@ static int __init hmm_dmirror_init(void)
				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
	dmirror_devices[ndevices++].zone_device_type =
	dmirror_devices[ndevices++].zone_device_type =
				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
	if (spm_addr_dev0 && spm_addr_dev1) {
		dmirror_devices[ndevices++].zone_device_type =
					HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
		dmirror_devices[ndevices++].zone_device_type =
					HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
	}
	for (id = 0; id < ndevices; id++) {
	for (id = 0; id < ndevices; id++) {
		ret = dmirror_device_init(dmirror_devices + id, id);
		ret = dmirror_device_init(dmirror_devices + id, id);
		if (ret)
		if (ret)
@@ -1333,6 +1463,7 @@ static void __exit hmm_dmirror_exit(void)
	int id;
	int id;


	for (id = 0; id < DMIRROR_NDEVICES; id++)
	for (id = 0; id < DMIRROR_NDEVICES; id++)
		if (dmirror_devices[id].zone_device_type)
			dmirror_device_remove(dmirror_devices + id);
			dmirror_device_remove(dmirror_devices + id);
	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
}
}
+4 −0
Original line number Original line Diff line number Diff line
@@ -50,6 +50,8 @@ struct hmm_dmirror_cmd {
 *					device the ioctl() is made
 *					device the ioctl() is made
 * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
 * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
 *					other device
 *					other device
 * HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device
 *				  the ioctl() is made
 */
 */
enum {
enum {
	HMM_DMIRROR_PROT_ERROR			= 0xFF,
	HMM_DMIRROR_PROT_ERROR			= 0xFF,
@@ -61,6 +63,8 @@ enum {
	HMM_DMIRROR_PROT_ZERO			= 0x10,
	HMM_DMIRROR_PROT_ZERO			= 0x10,
	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,
	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,
	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30,
	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30,
	HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL	= 0x40,
	HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE	= 0x50,
};
};


enum {
enum {