Commit d8ba8ba4 authored by David Woodhouse's avatar David Woodhouse Committed by Paolo Bonzini
Browse files

KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured



Closer inspection of the Xen code shows that we aren't supposed to be
using the XEN_RUNSTATE_UPDATE flag unconditionally. It should be
explicitly enabled by guests through the HYPERVISOR_vm_assist hypercall.
If we randomly set the top bit of ->state_entry_time for a guest that
hasn't asked for it and doesn't expect it, that could make the runtimes
fail to add up and confuse the guest. Without the flag it's perfectly
safe for a vCPU to read its own vcpu_runstate_info; just not for one
vCPU to read *another's*.

I briefly pondered adding a word for the whole set of VMASST_TYPE_*
flags but the only one we care about for HVM guests is this, so it
seemed a bit pointless.

Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20221127122210.248427-3-dwmw2@infradead.org>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 5ec3289b
Loading
Loading
Loading
Loading
+28 −6
Original line number Diff line number Diff line
@@ -5339,6 +5339,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
	union {
		__u8 long_mode;
		__u8 vector;
		__u8 runstate_update_flag;
		struct {
			__u64 gfn;
		} shared_info;
@@ -5416,6 +5417,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION
  event channel delivery, so responding within the kernel without
  exiting to userspace is beneficial.

KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
  XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
  other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
  the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
  hypercall.

4.127 KVM_XEN_HVM_GET_ATTR
--------------------------

@@ -8065,6 +8074,7 @@ PVHVM guests. Valid flags are::
  #define KVM_XEN_HVM_CONFIG_RUNSTATE			(1 << 3)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL		(1 << 4)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
  #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)

The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -8096,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
related to event channel delivery, timers, and the XENVER_version
interception.

The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports
the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR
and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the
XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during
updates of the runstate information. Note that versions of KVM which support
the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will
always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure,
which is perhaps counterintuitive. When this flag is advertised, KVM will
behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
specifically enabled (by the guest making the hypercall, causing the VMM
to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).

8.31 KVM_CAP_PPC_MULTITCE
-------------------------

+1 −0
Original line number Diff line number Diff line
@@ -1113,6 +1113,7 @@ struct msr_bitmap_range {
struct kvm_xen {
	u32 xen_version;
	bool long_mode;
	bool runstate_update_flag;
	u8 upcall_vector;
	struct gfn_to_pfn_cache shinfo_cache;
	struct idr evtchn_ports;
+2 −1
Original line number Diff line number Diff line
@@ -4431,7 +4431,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
		    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
		    KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
		if (sched_info_on())
			r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
			r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
			     KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
		break;
#endif
	case KVM_CAP_SYNC_REGS:
+44 −13
Original line number Diff line number Diff line
@@ -179,7 +179,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
	struct vcpu_runstate_info rs;
	unsigned long flags;
	size_t times_ofs;
	uint8_t *update_bit;
	uint8_t *update_bit = NULL;
	uint64_t entry_time;
	uint64_t *rs_times;
	int *rs_state;

@@ -297,6 +298,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
		 */
		rs_state = gpc1->khva;
		rs_times = gpc1->khva + times_ofs;
		if (v->kvm->arch.xen.runstate_update_flag)
			update_bit = ((void *)(&rs_times[1])) - 1;
	} else {
		/*
@@ -351,12 +353,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
		 * The update_bit is still directly in the guest memory,
		 * via one GPC or the other.
		 */
		if (v->kvm->arch.xen.runstate_update_flag) {
			if (user_len1 >= times_ofs + sizeof(uint64_t))
				update_bit = gpc1->khva + times_ofs +
					sizeof(uint64_t) - 1;
			else
				update_bit = gpc2->khva + times_ofs +
					sizeof(uint64_t) - 1 - user_len1;
		}

#ifdef CONFIG_X86_64
		/*
@@ -376,8 +380,12 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
	 * different cache line to the rest of the 64-bit word, due to
	 * the (lack of) alignment constraints.
	 */
	entry_time = vx->runstate_entry_time;
	if (update_bit) {
		entry_time |= XEN_RUNSTATE_UPDATE;
		*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
		smp_wmb();
	}

	/*
	 * Now assemble the actual structure, either on our kernel stack
@@ -385,7 +393,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
	 * rs_times pointers were set up above.
	 */
	*rs_state = vx->current_runstate;
	rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
	rs_times[0] = entry_time;
	memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));

	/* For the split case, we have to then copy it to the guest. */
@@ -396,8 +404,11 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
	smp_wmb();

	/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
	*update_bit = vx->runstate_entry_time >> 56;
	if (update_bit) {
		entry_time &= ~XEN_RUNSTATE_UPDATE;
		*update_bit = entry_time >> 56;
		smp_wmb();
	}

	if (user_len2)
		read_unlock(&gpc2->lock);
@@ -619,6 +630,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
		r = 0;
		break;

	case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
		if (!sched_info_on()) {
			r = -EOPNOTSUPP;
			break;
		}
		mutex_lock(&kvm->lock);
		kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
		mutex_unlock(&kvm->lock);
		r = 0;
		break;

	default:
		break;
	}
@@ -656,6 +678,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
		r = 0;
		break;

	case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
		if (!sched_info_on()) {
			r = -EOPNOTSUPP;
			break;
		}
		data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag;
		r = 0;
		break;

	default:
		break;
	}
+4 −0
Original line number Diff line number Diff line
@@ -1271,6 +1271,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)

struct kvm_xen_hvm_config {
	__u32 flags;
@@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr {
	union {
		__u8 long_mode;
		__u8 vector;
		__u8 runstate_update_flag;
		struct {
			__u64 gfn;
		} shared_info;
@@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr {
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
#define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
#define KVM_XEN_ATTR_TYPE_XEN_VERSION		0x4
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG	0x5

/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
Loading