Commit bed26e4a authored by wanghaibin's avatar wanghaibin Committed by Dongxu Sun
Browse files

KVM: arm64: Introduce shadow device

virt inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8K8HP


CVE: NA

--------------------------------

The shadow device implement that establish relationships between
virtual devices and back-end virtual platform devices.

Signed-off-by: default avatarwanghaibin <wanghaibin.wang@huawei.com>
Signed-off-by: default avatarZenghui Yu <yuzenghui@huawei.com>
Signed-off-by: default avatarKunkun Jiang <jiangkunkun@huawei.com>
Signed-off-by: default avatarDongxu Sun <sundongxu3@huawei.com>
parent e4c0f182
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
	 vgic/vgic-v3.o vgic/vgic-v4.o \
	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
	 vgic/vgic-its.o vgic/vgic-debug.o
	 vgic/vgic-its.o vgic/shadow_dev.o vgic/vgic-debug.o

kvm-$(CONFIG_KVM_ARM_PMU)  += pmu-emul.o
obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/
+38 −0
Original line number Diff line number Diff line
@@ -259,6 +259,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
	case KVM_CAP_STEAL_TIME:
		r = kvm_arm_pvtime_supported();
		break;
	case KVM_CAP_ARM_VIRT_MSI_BYPASS:
		r = sdev_enable;
		break;
	default:
		r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
		break;
@@ -1444,6 +1447,34 @@ long kvm_arch_vm_ioctl(struct file *filp,

		return 0;
	}
	case KVM_CREATE_SHADOW_DEV: {
		struct kvm_master_dev_info *mdi;
		u32 nvectors;
		int ret;

		if (get_user(nvectors, (const u32 __user *)argp))
			return -EFAULT;
		if (!nvectors)
			return -EINVAL;

		mdi = memdup_user(argp, sizeof(*mdi) + nvectors * sizeof(mdi->msi[0]));
		if (IS_ERR(mdi))
			return PTR_ERR(mdi);

		ret = kvm_shadow_dev_create(kvm, mdi);
		kfree(mdi);

		return ret;
	}
	case KVM_DEL_SHADOW_DEV: {
		u32 devid;

		if (get_user(devid, (const u32 __user *)argp))
			return -EFAULT;

		kvm_shadow_dev_delete(kvm, devid);
		return 0;
	}
	default:
		return -EINVAL;
	}
@@ -1885,6 +1916,11 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
	kvm_arm_resume_guest(irqfd->kvm);
}

void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
	kvm_shadow_dev_delete_all(kvm);
}

/**
 * Initialize Hyp-mode and memory mappings on all CPUs.
 */
@@ -1950,6 +1986,8 @@ int kvm_arch_init(void *opaque)
	else
		kvm_info("Hyp mode initialized successfully\n");

	kvm_shadow_dev_init();

	return 0;

out_hyp:
+327 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2019-2020 HUAWEI TECHNOLOGIES CO., LTD., All Rights Reserved.
 * Author: Wanghaibin <wanghaibin.wang@huawei.com>
 */

#include <linux/irq.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>

static struct workqueue_struct *sdev_cleanup_wq;
static bool virt_msi_bypass;
bool sdev_enable;

static void shadow_dev_destroy(struct work_struct *work);
static void sdev_virt_pdev_delete(struct platform_device *pdev);

int shadow_dev_virq_bypass_inject(struct kvm *kvm,
				  struct kvm_kernel_irq_routing_entry *e)
{
	struct shadow_dev *sdev = e->cache.data;
	u32 vec = e->msi.data;
	u32 host_irq = sdev->host_irq[vec];
	int ret;

	ret = irq_set_irqchip_state(host_irq, IRQCHIP_STATE_PENDING, true);
	WARN_RATELIMIT(ret, "IRQ %d", host_irq);

	return ret;
}

/* Must be called with the dist->sdev_list_lock held */
struct shadow_dev *kvm_shadow_dev_get(struct kvm *kvm, struct kvm_msi *msi)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	struct shadow_dev *sdev;

	if (!sdev_enable)
		return NULL;

	list_for_each_entry(sdev, &dist->sdev_list_head, entry) {
		if (sdev->devid != msi->devid)
			continue;

		if (sdev->nvecs <= msi->data ||
		    !test_bit(msi->data, sdev->enable))
			break;

		return sdev;
	}

	return NULL;
}

static struct platform_device *sdev_virt_pdev_add(u32 nvec)
{
	struct platform_device *virtdev;
	int ret = -ENOMEM;

	virtdev = platform_device_alloc("virt_plat_dev", PLATFORM_DEVID_AUTO);
	if (!virtdev) {
		kvm_err("Allocate virtual platform device failed\n");
		goto out;
	}

	dev_set_drvdata(&virtdev->dev, &nvec);

	ret = platform_device_add(virtdev);
	if (ret) {
		kvm_err("Add virtual platform device failed (%d)\n", ret);
		goto put_device;
	}

	return virtdev;

put_device:
	platform_device_put(virtdev);
out:
	return ERR_PTR(ret);
}

static void sdev_set_irq_entry(struct shadow_dev *sdev,
			       struct kvm_kernel_irq_routing_entry *irq_entries)
{
	int i;

	for (i = 0; i < sdev->nvecs; i++) {
		irq_entries[i].msi.address_lo = sdev->msi[i].address_lo;
		irq_entries[i].msi.address_hi = sdev->msi[i].address_hi;
		irq_entries[i].msi.data = sdev->msi[i].data;
		irq_entries[i].msi.flags = sdev->msi[i].flags;
		irq_entries[i].msi.devid = sdev->msi[i].devid;
	}
}

static int sdev_virq_bypass_active(struct kvm *kvm, struct shadow_dev *sdev)
{
	struct kvm_kernel_irq_routing_entry *irq_entries;
	struct msi_desc *desc;
	u32 vec = 0;

	sdev->host_irq = kcalloc(sdev->nvecs, sizeof(int), GFP_KERNEL);
	sdev->enable   = bitmap_zalloc(sdev->nvecs, GFP_KERNEL);
	irq_entries    = kcalloc(sdev->nvecs,
				 sizeof(struct kvm_kernel_irq_routing_entry),
				 GFP_KERNEL);

	if (!irq_entries || !sdev->enable || !sdev->host_irq) {
		kfree(sdev->host_irq);
		kfree(sdev->enable);
		kfree(irq_entries);
		return -ENOMEM;
	}

	sdev_set_irq_entry(sdev, irq_entries);

	for_each_msi_entry(desc, &sdev->pdev->dev) {
		if (!kvm_vgic_v4_set_forwarding(kvm, desc->irq,
						&irq_entries[vec])) {
			set_bit(vec, sdev->enable);
			sdev->host_irq[vec] = desc->irq;
		} else {
			/*
			 * Can not use shadow device for direct injection,
			 * though not fatal...
			 */
			kvm_err("Shadow device set (%d) forwarding failed",
				desc->irq);
		}
		vec++;
	}

	kfree(irq_entries);
	return 0;
}

static void sdev_msi_entry_init(struct kvm_master_dev_info *mdi,
				struct shadow_dev *sdev)
{
	int i;

	for (i = 0; i < sdev->nvecs; i++) {
		sdev->msi[i].address_lo = mdi->msi[i].address_lo;
		sdev->msi[i].address_hi = mdi->msi[i].address_hi;
		sdev->msi[i].data = mdi->msi[i].data;
		sdev->msi[i].flags = mdi->msi[i].flags;
		sdev->msi[i].devid = mdi->msi[i].devid;
	}
}

int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	struct shadow_dev *sdev;
	struct kvm_msi *msi;
	unsigned long flags;
	int ret;

	if (WARN_ON(!sdev_enable))
		return -EINVAL;

	ret = -ENOMEM;
	sdev = kzalloc(sizeof(struct shadow_dev), GFP_KERNEL);
	if (!sdev)
		return ret;

	sdev->nvecs = mdi->nvectors;

	msi = kcalloc(sdev->nvecs, sizeof(struct kvm_msi), GFP_KERNEL);
	if (!msi)
		goto free_sdev;

	sdev->msi = msi;
	sdev_msi_entry_init(mdi, sdev);
	sdev->devid = sdev->msi[0].devid;

	sdev->pdev = sdev_virt_pdev_add(sdev->nvecs);
	if (IS_ERR(sdev->pdev)) {
		ret = PTR_ERR(sdev->pdev);
		goto free_sdev_msi;
	}

	ret = sdev_virq_bypass_active(kvm, sdev);
	if (ret)
		goto delete_virtdev;

	sdev->kvm = kvm;
	INIT_WORK(&sdev->destroy, shadow_dev_destroy);

	raw_spin_lock_irqsave(&dist->sdev_list_lock, flags);
	list_add_tail(&sdev->entry, &dist->sdev_list_head);
	raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags);

	kvm_info("Create shadow device: 0x%x\n", sdev->devid);
	return ret;

delete_virtdev:
	sdev_virt_pdev_delete(sdev->pdev);
free_sdev_msi:
	kfree(sdev->msi);
free_sdev:
	kfree(sdev);
	return ret;
}

static void sdev_virt_pdev_delete(struct platform_device *pdev)
{
	platform_device_unregister(pdev);
}

static void sdev_virq_bypass_deactive(struct kvm *kvm, struct shadow_dev *sdev)
{
	struct kvm_kernel_irq_routing_entry *irq_entries;
	struct msi_desc *desc;
	u32 vec = 0;

	irq_entries = kcalloc(sdev->nvecs,
			      sizeof(struct kvm_kernel_irq_routing_entry),
			      GFP_KERNEL);
	if (!irq_entries)
		return;

	sdev_set_irq_entry(sdev, irq_entries);

	for_each_msi_entry(desc, &sdev->pdev->dev) {
		if (!kvm_vgic_v4_unset_forwarding(kvm, desc->irq,
						  &irq_entries[vec])) {
			clear_bit(vec, sdev->enable);
			sdev->host_irq[vec] = 0;
		} else {
			kvm_err("Shadow device unset (%d) forwarding failed",
				desc->irq);
		}
		vec++;
	}

	kfree(sdev->host_irq);
	kfree(sdev->enable);
	kfree(irq_entries);

	/* FIXME: no error handling */
}

static void shadow_dev_destroy(struct work_struct *work)
{
	struct shadow_dev *sdev = container_of(work, struct shadow_dev, destroy);
	struct kvm *kvm = sdev->kvm;

	sdev_virq_bypass_deactive(kvm, sdev);
	sdev_virt_pdev_delete(sdev->pdev);

	sdev->nvecs = 0;
	kfree(sdev->msi);
	kfree(sdev);
}

void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	struct shadow_dev *sdev, *tmp;
	unsigned long flags;

	if (WARN_ON(!sdev_enable))
		return;

	raw_spin_lock_irqsave(&dist->sdev_list_lock, flags);
	WARN_ON(list_empty(&dist->sdev_list_head)); /* shouldn't be invoked */

	list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) {
		if (sdev->devid != devid)
			continue;

		list_del(&sdev->entry);
		queue_work(sdev_cleanup_wq, &sdev->destroy);
		break;
	}
	raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags);

	flush_workqueue(sdev_cleanup_wq);
}

void kvm_shadow_dev_delete_all(struct kvm *kvm)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	struct shadow_dev *sdev, *tmp;
	unsigned long flags;

	if (!sdev_enable)
		return;

	raw_spin_lock_irqsave(&dist->sdev_list_lock, flags);

	list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) {
		list_del(&sdev->entry);
		queue_work(sdev_cleanup_wq, &sdev->destroy);
	}

	raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags);

	flush_workqueue(sdev_cleanup_wq);
}

static int __init early_virt_msi_bypass(char *buf)
{
	return strtobool(buf, &virt_msi_bypass);
}
early_param("kvm-arm.virt_msi_bypass", early_virt_msi_bypass);

void kvm_shadow_dev_init(void)
{
	/*
	 * FIXME: Ideally shadow device should only rely on a GICv4.0
	 * capable ITS, but we should also take the reserved device ID
	 * pools into account.
	 */
	sdev_enable = kvm_vgic_global_state.has_gicv4 && virt_msi_bypass;

	sdev_cleanup_wq = alloc_workqueue("kvm-sdev-cleanup", 0, 0);
	if (!sdev_cleanup_wq)
		sdev_enable = false;

	kvm_info("Shadow device %sabled\n", sdev_enable ? "en" : "dis");
}
+3 −0
Original line number Diff line number Diff line
@@ -62,6 +62,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
		raw_spin_lock_init(lpi_lock);
	}
	raw_spin_lock_init(&dist->lpi_list_lock);

	INIT_LIST_HEAD(&dist->sdev_list_head);
	raw_spin_lock_init(&dist->sdev_list_lock);
}

/* CREATION */
+24 −0
Original line number Diff line number Diff line
@@ -33,6 +33,21 @@
#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \
			 (irq) <= VGIC_MAX_SPI)

struct shadow_dev {
	struct kvm              *kvm;
	struct list_head        entry;

	u32                     devid;  /* guest visible device id */
	u32                     nvecs;
	unsigned long           *enable;
	int                     *host_irq;
	struct kvm_msi          *msi;

	struct platform_device  *pdev;

	struct work_struct      destroy;
};

/* Information about HiSilicon implementation of vtimer (GICv4.1-based) */
struct vtimer_info {
	u32 intid;
@@ -297,6 +312,9 @@ struct vgic_dist {
	 * else.
	 */
	struct its_vm		its_vm;

	raw_spinlock_t		sdev_list_lock;
	struct list_head	sdev_list_head;
};

struct vgic_v2_cpu_if {
@@ -448,4 +466,10 @@ int kvm_vgic_config_vtimer_irqbypass(struct kvm_vcpu *vcpu, u32 vintid,
		bool (*get_as)(struct kvm_vcpu *, int),
		void (*set_as)(struct kvm_vcpu *, int, bool));

extern bool sdev_enable;

void kvm_shadow_dev_init(void);
int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi);
void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid);
void kvm_shadow_dev_delete_all(struct kvm *kvm);
#endif /* __KVM_ARM_VGIC_H */
Loading