Commit a4cfff3f authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge branch 'kvm-older-features' into HEAD



Merge branch for features that did not make it into 5.18:

* New ioctls to get/set TSC frequency for a whole VM

* Allow userspace to opt out of hypercall patching

Nested virtualization improvements for AMD:

* Support for "nested nested" optimizations (nested vVMLOAD/VMSAVE,
  nested vGIF)

* Allow AVIC to co-exist with a nested guest running

* Fixes for LBR virtualizations when a nested guest is running,
  and nested LBR virtualization support

* PAUSE filtering for nested hypervisors

Guest support:

* Decoupling of vcpu_is_preempted from PV spinlocks

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents 42dcbe7d 8d5678a7
Loading
Loading
Loading
Loading
+135 −14
Original line number Diff line number Diff line
@@ -982,12 +982,22 @@ memory.
	__u8 pad2[30];
  };

If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
This requests KVM to generate the contents of the hypercall page
automatically; hypercalls will be intercepted and passed to userspace
through KVM_EXIT_XEN.  In this case, all of the blob size and address
fields must be zero.
If certain flags are returned from the KVM_CAP_XEN_HVM check, they may
be set in the flags field of this ioctl:

The KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag requests KVM to generate
the contents of the hypercall page automatically; hypercalls will be
intercepted and passed to userspace through KVM_EXIT_XEN.  In this
ase, all of the blob size and address fields must be zero.

The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates to KVM that userspace
will always use the KVM_XEN_HVM_EVTCHN_SEND ioctl to deliver event
channel interrupts rather than manipulating the guest's shared_info
structures directly. This, in turn, may allow KVM to enable features
such as intercepting the SCHEDOP_poll hypercall to accelerate PV
spinlock operation for the guest. Userspace may still use the ioctl
to deliver events if it was advertised, even if userspace does not
send this indication that it will always do so

No other flags are currently valid in the struct kvm_xen_hvm_config.

@@ -1887,22 +1897,25 @@ the future.
4.55 KVM_SET_TSC_KHZ
--------------------

:Capability: KVM_CAP_TSC_CONTROL
:Capability: KVM_CAP_TSC_CONTROL / KVM_CAP_VM_TSC_CONTROL
:Architectures: x86
:Type: vcpu ioctl
:Type: vcpu ioctl / vm ioctl
:Parameters: virtual tsc_khz
:Returns: 0 on success, -1 on error

Specifies the tsc frequency for the virtual machine. The unit of the
frequency is KHz.

If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
be used as a vm ioctl to set the initial tsc frequency of subsequently
created vCPUs.

4.56 KVM_GET_TSC_KHZ
--------------------

:Capability: KVM_CAP_GET_TSC_KHZ
:Capability: KVM_CAP_GET_TSC_KHZ / KVM_CAP_VM_TSC_CONTROL
:Architectures: x86
:Type: vcpu ioctl
:Type: vcpu ioctl / vm ioctl
:Parameters: none
:Returns: virtual tsc-khz on success, negative value on error

@@ -5216,7 +5229,25 @@ have deterministic behavior.
		struct {
			__u64 gfn;
		} shared_info;
		__u64 pad[4];
		struct {
			__u32 send_port;
			__u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
			__u32 flags;
			union {
				struct {
					__u32 port;
					__u32 vcpu;
					__u32 priority;
				} port;
				struct {
					__u32 port; /* Zero for eventfd */
					__s32 fd;
				} eventfd;
				__u32 padding[4];
			} deliver;
		} evtchn;
		__u32 xen_version;
		__u64 pad[8];
	} u;
  };

@@ -5247,6 +5278,30 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO

KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
  Sets the exception vector used to deliver Xen event channel upcalls.
  This is the HVM-wide vector injected directly by the hypervisor
  (not through the local APIC), typically configured by a guest via
  HVM_PARAM_CALLBACK_IRQ.

KVM_XEN_ATTR_TYPE_EVTCHN
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
  an outbound port number for interception of EVTCHNOP_send requests
  from the guest. A given sending port number may be directed back
  to a specified vCPU (by APIC ID) / port / priority on the guest,
  or to trigger events on an eventfd. The vCPU and priority can be
  changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
  but other fields cannot change for a given sending port. A port
  mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
  field.

KVM_XEN_ATTR_TYPE_XEN_VERSION
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
  the 32-bit version code returned to the guest when it invokes the
  XENVER_version call; typically (XEN_MAJOR << 16 | XEN_MINOR). PV
  Xen guests will often use this to as a dummy hypercall to trigger
  event channel delivery, so responding within the kernel without
  exiting to userspace is beneficial.

4.127 KVM_XEN_HVM_GET_ATTR
--------------------------
@@ -5258,7 +5313,8 @@ KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
:Returns: 0 on success, < 0 on error

Allows Xen VM attributes to be read. For the structure and types,
see KVM_XEN_HVM_SET_ATTR above.
see KVM_XEN_HVM_SET_ATTR above. The KVM_XEN_ATTR_TYPE_EVTCHN
attribute cannot be read.

4.128 KVM_XEN_VCPU_SET_ATTR
---------------------------
@@ -5285,6 +5341,13 @@ see KVM_XEN_HVM_SET_ATTR above.
			__u64 time_blocked;
			__u64 time_offline;
		} runstate;
		__u32 vcpu_id;
		struct {
			__u32 port;
			__u32 priority;
			__u64 expires_ns;
		} timer;
		__u8 vector;
	} u;
  };

@@ -5326,6 +5389,27 @@ KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
  or RUNSTATE_offline) to set the current accounted state as of the
  adjusted state_entry_time.

KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the Xen
  vCPU ID of the given vCPU, to allow timer-related VCPU operations to
  be intercepted by KVM.

KVM_XEN_VCPU_ATTR_TYPE_TIMER
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
  event channel port/priority for the VIRQ_TIMER of the vCPU, as well
  as allowing a pending timer to be saved/restored.

KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
  per-vCPU local APIC upcall vector, configured by a Xen guest with
  the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
  used by Windows guests, and is distinct from the HVM-wide upcall
  vector configured with HVM_PARAM_CALLBACK_IRQ.


4.129 KVM_XEN_VCPU_GET_ATTR
---------------------------

@@ -5645,6 +5729,25 @@ enabled with ``arch_prctl()``, but this may change in the future.
The offsets of the state save areas in struct kvm_xsave follow the contents
of CPUID leaf 0xD on the host.

4.135 KVM_XEN_HVM_EVTCHN_SEND
-----------------------------

:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_irq_routing_xen_evtchn
:Returns: 0 on success, < 0 on error


::

   struct kvm_irq_routing_xen_evtchn {
	__u32 port;
	__u32 vcpu;
	__u32 priority;
   };

This ioctl injects an event channel interrupt directly to the guest vCPU.

5. The kvm_run structure
========================
@@ -7135,6 +7238,15 @@ The valid bits in cap.args[0] are:
                                    Additionally, when this quirk is disabled,
                                    KVM clears CPUID.01H:ECX[bit 3] if
                                    IA32_MISC_ENABLE[bit 18] is cleared.

 KVM_X86_QUIRK_FIX_HYPERCALL_INSN   By default, KVM rewrites guest
                                    VMMCALL/VMCALL instructions to match the
                                    vendor's hypercall instruction for the
                                    system. When this quirk is disabled, KVM
                                    will no longer rewrite invalid guest
                                    hypercall instructions. Executing the
                                    incorrect hypercall instruction will
                                    generate a #UD within the guest.
=================================== ============================================

8. Other capabilities.
@@ -7612,8 +7724,9 @@ PVHVM guests. Valid flags are::
  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
  #define KVM_XEN_HVM_CONFIG_SHARED_INFO	(1 << 2)
  #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 2)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 3)
  #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND	(1 << 5)

The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -7637,6 +7750,14 @@ The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
field set to indicate 2 level event channel delivery.

The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates that KVM supports
injecting event channel events directly into the guest with the
KVM_XEN_HVM_EVTCHN_SEND ioctl. It also indicates support for the
KVM_XEN_ATTR_TYPE_EVTCHN/XEN_VERSION HVM attributes and the
KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
related to event channel delivery, timers, and the XENVER_version
interception.

8.31 KVM_CAP_PPC_MULTITCE
-------------------------

+1 −0
Original line number Diff line number Diff line
@@ -126,6 +126,7 @@ KVM_X86_OP_OPTIONAL(migrate_timers)
KVM_X86_OP(msr_filter_changed)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);

#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
+25 −9
Original line number Diff line number Diff line
@@ -607,16 +607,21 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_xen {
	u64 hypercall_rip;
	u32 current_runstate;
	bool vcpu_info_set;
	bool vcpu_time_info_set;
	bool runstate_set;
	struct gfn_to_hva_cache vcpu_info_cache;
	struct gfn_to_hva_cache vcpu_time_info_cache;
	struct gfn_to_hva_cache runstate_cache;
	u8 upcall_vector;
	struct gfn_to_pfn_cache vcpu_info_cache;
	struct gfn_to_pfn_cache vcpu_time_info_cache;
	struct gfn_to_pfn_cache runstate_cache;
	u64 last_steal;
	u64 runstate_entry_time;
	u64 runstate_times[4];
	unsigned long evtchn_pending_sel;
	u32 vcpu_id; /* The Xen / ACPI vCPU ID */
	u32 timer_virq;
	u64 timer_expires; /* In guest epoch */
	atomic_t timer_pending;
	struct hrtimer timer;
	int poll_evtchn;
	struct timer_list poll_timer;
};

struct kvm_vcpu_arch {
@@ -753,8 +758,7 @@ struct kvm_vcpu_arch {
	gpa_t time;
	struct pvclock_vcpu_time_info hv_clock;
	unsigned int hw_tsc_khz;
	struct gfn_to_hva_cache pv_time;
	bool pv_time_enabled;
	struct gfn_to_pfn_cache pv_time;
	/* set guest stopped flag in pvclock flags field */
	bool pvclock_set_guest_stopped_request;

@@ -1024,9 +1028,12 @@ struct msr_bitmap_range {

/* Xen emulation context */
struct kvm_xen {
	u32 xen_version;
	bool long_mode;
	u8 upcall_vector;
	struct gfn_to_pfn_cache shinfo_cache;
	struct idr evtchn_ports;
	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
};

enum kvm_irqchip_mode {
@@ -1119,6 +1126,8 @@ struct kvm_arch {
	u64 cur_tsc_generation;
	int nr_vcpus_matched_tsc;

	u32 default_tsc_khz;

	seqcount_raw_spinlock_t pvclock_sc;
	bool use_master_clock;
	u64 master_kernel_ns;
@@ -1498,6 +1507,11 @@ struct kvm_x86_ops {
	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);

	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);

	/*
	 * Returns vCPU specific APICv inhibit reasons
	 */
	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
};

struct kvm_x86_nested_ops {
@@ -1799,6 +1813,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
				struct x86_exception *exception);

bool kvm_apicv_activated(struct kvm *kvm);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
				      enum kvm_apicv_inhibit reason, bool set);
@@ -1988,6 +2003,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
	 KVM_X86_QUIRK_CD_NW_CLEARED |		\
	 KVM_X86_QUIRK_LAPIC_MMIO_HOLE |	\
	 KVM_X86_QUIRK_OUT_7E_INC_RIP |		\
	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)
	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |	\
	 KVM_X86_QUIRK_FIX_HYPERCALL_INSN)

#endif /* _ASM_X86_KVM_HOST_H */
+6 −5
Original line number Diff line number Diff line
@@ -433,6 +433,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE		(1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP		(1 << 3)
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT	(1 << 4)
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN	(1 << 5)

#define KVM_STATE_NESTED_FORMAT_VMX	0
#define KVM_STATE_NESTED_FORMAT_SVM	1
+2 −2
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@

#include <asm/ia32.h>

#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#if defined(CONFIG_KVM_GUEST)
#include <asm/kvm_para.h>
#endif

@@ -20,7 +20,7 @@ int main(void)
	BLANK();
#endif

#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#if defined(CONFIG_KVM_GUEST)
	OFFSET(KVM_STEAL_TIME_preempted, kvm_steal_time, preempted);
	BLANK();
#endif
Loading