KVM: x86/xen: Fix runstate updates to be atomic when preempting vCPU (fcb732d8) · Commits · EulixOS / Software / Kernel

arch/x86/kvm/xen.c

+67 −30

Original line number	Diff line number	Diff line
		@@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
		void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
		{
		struct kvm_vcpu_xen *vx = &v->arch.xen;
		struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
		struct kvm_memslots *slots = kvm_memslots(v->kvm);
		bool atomic = (state == RUNSTATE_runnable);
		uint64_t state_entry_time;
		unsigned int offset;
		int __user *user_state;
		uint64_t __user *user_times;

		kvm_xen_update_runstate(v, state);

		if (!vx->runstate_set)
		return;

		BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
		if (unlikely(slots->generation != ghc->generation \|\| kvm_is_error_hva(ghc->hva)) &&
		kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
		return;

		/* We made sure it fits in a single page */
		BUG_ON(!ghc->memslot);

		if (atomic)
		pagefault_disable();

		offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
		#ifdef CONFIG_X86_64
		/*
		* The only difference is alignment of uint64_t in 32-bit.
		* So the first field 'state' is accessed directly using
		* offsetof() (where its offset happens to be zero), while the
		* remaining fields which are all uint64_t, start at 'offset'
		* which we tweak here by adding 4.
		* The only difference between 32-bit and 64-bit versions of the
		* runstate struct us the alignment of uint64_t in 32-bit, which
		* means that the 64-bit version has an additional 4 bytes of
		* padding after the first field 'state'.
		*
		* So we use 'int __user *user_state' to point to the state field,
		* and 'uint64_t __user *user_times' for runstate_entry_time. So
		* the actual array of time[] in each state starts at user_times[1].
		*/
		BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
		BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
		user_state = (int __user *)ghc->hva;

		BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);

		user_times = (uint64_t __user *)(ghc->hva +
		offsetof(struct compat_vcpu_runstate_info,
		state_entry_time));
		#ifdef CONFIG_X86_64
		BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
		offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
		BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
		offsetof(struct compat_vcpu_runstate_info, time) + 4);

		if (v->kvm->arch.xen.long_mode)
		offset = offsetof(struct vcpu_runstate_info, state_entry_time);
		user_times = (uint64_t __user *)(ghc->hva +
		offsetof(struct vcpu_runstate_info,
		state_entry_time));
		#endif
		/*
		* First write the updated state_entry_time at the appropriate
		@@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
		BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
		sizeof(state_entry_time));

		if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
		&state_entry_time, offset,
		sizeof(state_entry_time)))
		return;
		if (__put_user(state_entry_time, user_times))
		goto out;
		smp_wmb();

		/*
		@@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
		BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
		sizeof(vx->current_runstate));

		if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
		&vx->current_runstate,
		offsetof(struct vcpu_runstate_info, state),
		sizeof(vx->current_runstate)))
		return;
		if (__put_user(vx->current_runstate, user_state))
		goto out;

		/*
		* Write the actual runstate times immediately after the
		@@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
		BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
		sizeof(vx->runstate_times));

		if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
		&vx->runstate_times[0],
		offset + sizeof(u64),
		sizeof(vx->runstate_times)))
		return;

		if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
		goto out;
		smp_wmb();

		/*
		* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
		* runstate_entry_time field.
		*/

		state_entry_time &= ~XEN_RUNSTATE_UPDATE;
		if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
		&state_entry_time, offset,
		sizeof(state_entry_time)))
		return;
		__put_user(state_entry_time, user_times);
		smp_wmb();

		out:
		mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);

		if (atomic)
		pagefault_enable();
		}

		int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
		@@ -443,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
		break;
		}

		/* It must fit within a single page */
		if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
		r = -EINVAL;
		break;
		}

		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
		&vcpu->arch.xen.vcpu_info_cache,
		data->u.gpa,
		@@ -460,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
		break;
		}

		/* It must fit within a single page */
		if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
		r = -EINVAL;
		break;
		}

		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
		&vcpu->arch.xen.vcpu_time_info_cache,
		data->u.gpa,
		@@ -481,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
		break;
		}

		/* It must fit within a single page */
		if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
		r = -EINVAL;
		break;
		}

		r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
		&vcpu->arch.xen.runstate_cache,
		data->u.gpa,