Commit 32e92b71 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

Merge branch kvm-arm64/m1 into kvmarm-master/next

Rework the KVM GIC and timer to cope with lesser HW such as
the Apple M1 SoC.

* kvm-arm64/m1:
  irqchip/apple-aic: Advertise some level of vGICv3 compatibility
  KVM: arm64: timer: Add support for SW-based deactivation
  KVM: arm64: timer: Refactor IRQ configuration
  KVM: arm64: vgic: Implement SW-driven deactivation
  KVM: arm64: vgic: move irq->get_input_level into an ops structure
  KVM: arm64: vgic: Let an interrupt controller advertise lack of HW deactivation
  KVM: arm64: vgic: Be tolerant to the lack of maintenance interrupt masking
  KVM: arm64: Handle physical FIQ as an IRQ while running a guest
  irqchip/gic: Split vGIC probing information from the GIC code
parents 8124c8a6 b6ca556c
Loading
Loading
Loading
Loading
+136 −26
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/uaccess.h>

#include <clocksource/arm_arch_timer.h>
@@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
	return 0;
}

int kvm_timer_hyp_init(bool has_gic)
static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
	struct arch_timer_kvm_info *info;
	int err;
	if (vcpu)
		irqd_set_forwarded_to_vcpu(d);
	else
		irqd_clr_forwarded_to_vcpu(d);

	info = arch_timer_get_kvm_info();
	timecounter = &info->timecounter;
	return 0;
}

	if (!timecounter->cc) {
		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
		return -ENODEV;
static int timer_irq_set_irqchip_state(struct irq_data *d,
				       enum irqchip_irq_state which, bool val)
{
	if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
		return irq_chip_set_parent_state(d, which, val);

	if (val)
		irq_chip_mask_parent(d);
	else
		irq_chip_unmask_parent(d);

	return 0;
}

	/* First, do the virtual EL1 timer irq */
static void timer_irq_eoi(struct irq_data *d)
{
	if (!irqd_is_forwarded_to_vcpu(d))
		irq_chip_eoi_parent(d);
}

static void timer_irq_ack(struct irq_data *d)
{
	d = d->parent_data;
	if (d->chip->irq_ack)
		d->chip->irq_ack(d);
}

static struct irq_chip timer_chip = {
	.name			= "KVM",
	.irq_ack		= timer_irq_ack,
	.irq_mask		= irq_chip_mask_parent,
	.irq_unmask		= irq_chip_unmask_parent,
	.irq_eoi		= timer_irq_eoi,
	.irq_set_type		= irq_chip_set_type_parent,
	.irq_set_vcpu_affinity	= timer_irq_set_vcpu_affinity,
	.irq_set_irqchip_state	= timer_irq_set_irqchip_state,
};

static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
				  unsigned int nr_irqs, void *arg)
{
	irq_hw_number_t hwirq = (uintptr_t)arg;

	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
					     &timer_chip, NULL);
}

static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
				  unsigned int nr_irqs)
{
}

static const struct irq_domain_ops timer_domain_ops = {
	.alloc	= timer_irq_domain_alloc,
	.free	= timer_irq_domain_free,
};

static struct irq_ops arch_timer_irq_ops = {
	.get_input_level = kvm_arch_timer_get_input_level,
};

static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
{
	*flags = irq_get_trigger_type(virq);
	if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
		kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
			virq);
		*flags = IRQF_TRIGGER_LOW;
	}
}

static int kvm_irq_init(struct arch_timer_kvm_info *info)
{
	struct irq_domain *domain = NULL;

	if (info->virtual_irq <= 0) {
		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
			info->virtual_irq);
		return -ENODEV;
	}

	host_vtimer_irq = info->virtual_irq;
	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);

	if (kvm_vgic_global_state.no_hw_deactivation) {
		struct fwnode_handle *fwnode;
		struct irq_data *data;

		fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
		if (!fwnode)
			return -ENOMEM;

		/* Assume both vtimer and ptimer in the same parent */
		data = irq_get_irq_data(host_vtimer_irq);
		domain = irq_domain_create_hierarchy(data->domain, 0,
						     NR_KVM_TIMERS, fwnode,
						     &timer_domain_ops, NULL);
		if (!domain) {
			irq_domain_free_fwnode(fwnode);
			return -ENOMEM;
		}

		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
					    (void *)TIMER_VTIMER));
	}

	if (info->physical_irq > 0) {
		host_ptimer_irq = info->physical_irq;
		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);

		if (domain)
			WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
						    (void *)TIMER_PTIMER));
	}

	return 0;
}

	host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
	if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
	    host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
		kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
			host_vtimer_irq);
		host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
int kvm_timer_hyp_init(bool has_gic)
{
	struct arch_timer_kvm_info *info;
	int err;

	info = arch_timer_get_kvm_info();
	timecounter = &info->timecounter;

	if (!timecounter->cc) {
		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
		return -ENODEV;
	}

	err = kvm_irq_init(info);
	if (err)
		return err;

	/* First, do the virtual EL1 timer irq */

	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
				 "kvm guest vtimer", kvm_get_running_vcpus());
	if (err) {
@@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
	/* Now let's do the physical EL1 timer irq */

	if (info->physical_irq > 0) {
		host_ptimer_irq = info->physical_irq;
		host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
		if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
		    host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
			kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
				host_ptimer_irq);
			host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
		}

		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
					 "kvm guest ptimer", kvm_get_running_vcpus());
		if (err) {
@@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
	ret = kvm_vgic_map_phys_irq(vcpu,
				    map.direct_vtimer->host_timer_irq,
				    map.direct_vtimer->irq.irq,
				    kvm_arch_timer_get_input_level);
				    &arch_timer_irq_ops);
	if (ret)
		return ret;

@@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
		ret = kvm_vgic_map_phys_irq(vcpu,
					    map.direct_ptimer->host_timer_irq,
					    map.direct_ptimer->irq.irq,
					    kvm_arch_timer_get_input_level);
					    &arch_timer_irq_ops);
	}

	if (ret)
+3 −3
Original line number Diff line number Diff line
@@ -76,6 +76,7 @@ el1_trap:
	b	__guest_exit

el1_irq:
el1_fiq:
	get_vcpu_ptr	x1, x0
	mov	x0, #ARM_EXCEPTION_IRQ
	b	__guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
	invalid_vector	el2t_error_invalid
	invalid_vector	el2h_irq_invalid
	invalid_vector	el2h_fiq_invalid
	invalid_vector	el1_fiq_invalid

	.ltorg

@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)

	valid_vect	el1_sync		// Synchronous 64-bit EL1
	valid_vect	el1_irq			// IRQ 64-bit EL1
	invalid_vect	el1_fiq_invalid		// FIQ 64-bit EL1
	valid_vect	el1_fiq			// FIQ 64-bit EL1
	valid_vect	el1_error		// Error 64-bit EL1

	valid_vect	el1_sync		// Synchronous 32-bit EL1
	valid_vect	el1_irq			// IRQ 32-bit EL1
	invalid_vect	el1_fiq_invalid		// FIQ 32-bit EL1
	valid_vect	el1_fiq			// FIQ 32-bit EL1
	valid_vect	el1_error		// Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_vector)

+32 −4
Original line number Diff line number Diff line
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
	return IRQ_HANDLED;
}

static struct gic_kvm_info *gic_kvm_info;

void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
{
	BUG_ON(gic_kvm_info != NULL);
	gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
	if (gic_kvm_info)
		*gic_kvm_info = *info;
}

/**
 * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
 *
@@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
 */
int kvm_vgic_hyp_init(void)
{
	const struct gic_kvm_info *gic_kvm_info;
	bool has_mask;
	int ret;

	gic_kvm_info = gic_get_kvm_info();
	if (!gic_kvm_info)
		return -ENODEV;

	if (!gic_kvm_info->maint_irq) {
	has_mask = !gic_kvm_info->no_maint_irq_mask;

	if (has_mask && !gic_kvm_info->maint_irq) {
		kvm_err("No vgic maintenance irq\n");
		return -ENXIO;
	}

	/*
	 * If we get one of these oddball non-GICs, taint the kernel,
	 * as we have no idea of how they *really* behave.
	 */
	if (gic_kvm_info->no_hw_deactivation) {
		kvm_info("Non-architectural vgic, tainting kernel\n");
		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
		kvm_vgic_global_state.no_hw_deactivation = true;
	}

	switch (gic_kvm_info->type) {
	case GIC_V2:
		ret = vgic_v2_probe(gic_kvm_info);
@@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
		ret = -ENODEV;
	}

	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;

	kfree(gic_kvm_info);
	gic_kvm_info = NULL;

	if (ret)
		return ret;

	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
	if (!has_mask)
		return 0;

	ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
				 vgic_maintenance_handler,
				 "vgic", kvm_get_running_vcpus());
+15 −4
Original line number Diff line number Diff line
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
		 * If this causes us to lower the level, we have to also clear
		 * the physical active state, since we will otherwise never be
		 * told when the interrupt becomes asserted again.
		 *
		 * Another case is when the interrupt requires a helping hand
		 * on deactivation (no HW deactivation, for example).
		 */
		if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
		if (vgic_irq_is_mapped_level(irq)) {
			bool resample = false;

			if (val & GICH_LR_PENDING_BIT) {
				irq->line_level = vgic_get_phys_line_level(irq);
				resample = !irq->line_level;
			} else if (vgic_irq_needs_resampling(irq) &&
				   !(irq->active || irq->pending_latch)) {
				resample = true;
			}

			if (!irq->line_level)
			if (resample)
				vgic_irq_set_phys_active(irq, false);
		}

@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
	if (irq->group)
		val |= GICH_LR_GROUP1;

	if (irq->hw) {
	if (irq->hw && !vgic_irq_needs_resampling(irq)) {
		val |= GICH_LR_HW;
		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
		/*
+15 −4
Original line number Diff line number Diff line
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
		 * If this causes us to lower the level, we have to also clear
		 * the physical active state, since we will otherwise never be
		 * told when the interrupt becomes asserted again.
		 *
		 * Another case is when the interrupt requires a helping hand
		 * on deactivation (no HW deactivation, for example).
		 */
		if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
		if (vgic_irq_is_mapped_level(irq)) {
			bool resample = false;

			if (val & ICH_LR_PENDING_BIT) {
				irq->line_level = vgic_get_phys_line_level(irq);
				resample = !irq->line_level;
			} else if (vgic_irq_needs_resampling(irq) &&
				   !(irq->active || irq->pending_latch)) {
				resample = true;
			}

			if (!irq->line_level)
			if (resample)
				vgic_irq_set_phys_active(irq, false);
		}

@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
		}
	}

	if (irq->hw) {
	if (irq->hw && !vgic_irq_needs_resampling(irq)) {
		val |= ICH_LR_HW;
		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
		/*
Loading