Unverified Commit 606f08e8 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!275 Intel Advanced Matrix Extensions (AMX) - KVM support

Merge Pull Request from: @Linwang_68f8 
 
 **Content:** 
Intel® Advanced Matrix Extensions (Intel® AMX) is a new 64-bit programming paradigm consisting of two components: a set of 2-dimensional registers (tiles) representing sub-arrays from a larger 2-dimensional memory image, and an accelerator able to operate on tiles, the first implementation is called TMUL (tile matrix multiply unit).

There are 37 patches in total in this patch set to introduce AMX guest support in openEuler.

 **Intel-kernel issue:** 
https://gitee.com/openeuler/intel-kernel/issues/I5RQLJ

 **Test environment:** 
Host: openEuler 22.09 + backporting kernel
Guest: openEuler 22.09 + QEMU 7.0 + backporting kernel

 **Test cases:** 
Host:
kernel self-test including sigaltstack and AMX state management testing.
TMUL functional testing.
AMX stress.
Context switch testing.
INT8/BF16 online inference.
Guest:
AMX stress.
Context switch testing.
INT8/BF16 online inference.

 **Known issue:** 
N/A

 **Default config change:** 
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/275

 
Reviewed-by: default avatarJun Tian <jun.j.tian@intel.com>
Reviewed-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Reviewed-by: default avatarLiu Chao <liuchao173@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 6f8d3b00 7ea464f5
Loading
Loading
Loading
Loading
+46 −3
Original line number Diff line number Diff line
@@ -1514,6 +1514,7 @@ is vcpu 0.

  struct kvm_xsave {
	__u32 region[1024];
	__u32 extra[0];
  };

This ioctl would copy current vcpu's xsave struct to the userspace.
@@ -1522,7 +1523,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
4.43 KVM_SET_XSAVE
------------------

:Capability: KVM_CAP_XSAVE
:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (in)
@@ -1533,9 +1534,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.

  struct kvm_xsave {
	__u32 region[1024];
	__u32 extra[0];
  };

This ioctl would copy userspace's xsave struct to the kernel.
This ioctl would copy userspace's xsave struct to the kernel. It copies
as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
when invoked on the vm file descriptor. The size value returned by
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
Currently, it is only greater than 4096 if a dynamic feature has been
enabled with ``arch_prctl()``, but this may change in the future.

The offsets of the state save areas in struct kvm_xsave follow the
contents of CPUID leaf 0xD on the host.


4.44 KVM_GET_XCRS
@@ -1632,6 +1642,10 @@ userspace capabilities, and with user requirements (for example, the
user may wish to constrain cpuid to emulate older hardware, or for
feature consistency across a cluster).

Dynamically-enabled feature bits need to be requested with
``arch_prctl()`` before calling this ioctl. Feature bits that have not
been requested are excluded from the result.

Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
expose cpuid features (e.g. MONITOR) which are not supported by kvm in
its default configuration. If userspace enables such capabilities, it
@@ -3181,6 +3195,7 @@ number.

:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
             KVM_CAP_VCPU_ATTRIBUTES for vcpu device
             KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -3216,6 +3231,7 @@ transferred is defined by the particular attribute.

:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
             KVM_CAP_VCPU_ATTRIBUTES for vcpu device
             KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error
@@ -4979,6 +4995,33 @@ KVM does guarantee that vCPUs will see either the previous filter or the new
filter, e.g. MSRs with identical settings in both the old and new filter will
have deterministic behavior.

4.134 KVM_GET_XSAVE2
--------------------

:Capability: KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (out)
:Returns: 0 on success, -1 on error


::

  struct kvm_xsave {
        __u32 region[1024];
        __u32 extra[0];
  };

This ioctl would copy current vcpu's xsave struct to the userspace. It
copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
when invoked on the vm file descriptor. The size value returned by
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
Currently, it is only greater than 4096 if a dynamic feature has been
enabled with ``arch_prctl()``, but this may change in the future.

The offsets of the state save areas in struct kvm_xsave follow the contents
of CPUID leaf 0xD on the host.


5. The kvm_run structure
========================
+11 −0
Original line number Diff line number Diff line
@@ -111,10 +111,21 @@ static inline void fpstate_free(struct fpu *fpu) { }
/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);

extern u64 xstate_get_guest_group_perm(void);

/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);

#ifdef CONFIG_X86_64
extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
extern void fpu_sync_guest_vmexit_xfd_state(void);
#else
static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif

extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+32 −0
Original line number Diff line number Diff line
@@ -389,6 +389,8 @@ struct fpstate {
	/* @regs is dynamically sized! Don't add anything after @regs! */
} __aligned(64);

#define FPU_GUEST_PERM_LOCKED		BIT_ULL(63)

struct fpu_state_perm {
	/*
	 * @__state_perm:
@@ -478,6 +480,13 @@ struct fpu {
	 */
	KABI_EXTEND(struct fpu_state_perm perm)

	/*
	 * @guest_perm:
	 *
	 * Permission related information for guest pseudo FPUs
	 */
	KABI_EXTEND(struct fpu_state_perm guest_perm)

	/*
	 * @__fpstate:
	 *
@@ -498,6 +507,29 @@ struct fpu {
 * Guest pseudo FPU container
 */
struct fpu_guest {
	/*
	 * @xfeatures:			xfeature bitmap of features which are
	 *				currently enabled for the guest vCPU.
	 */
	u64				xfeatures;

	/*
	 * @perm:			xfeature bitmap of features which are
	 *				permitted to be enabled for the guest
	 *				vCPU.
	 */
	u64				perm;

	/*
	 * @xfd_err:			Save the guest value.
	 */
	u64				xfd_err;

	/*
	 * @uabi_size:			Size required for save/restore
	 */
	unsigned int			uabi_size;

	/*
	 * @fpstate:			Pointer to the allocated guest fpstate
	 */
+1 −0
Original line number Diff line number Diff line
@@ -575,6 +575,7 @@ struct kvm_vcpu_arch {
	bool at_instruction_boundary;
	bool tpr_access_reporting;
	bool xsaves_enabled;
	bool xfd_no_write_intercept;
	u64 ia32_xss;
	u64 microcode_version;
	u64 arch_capabilities;
+18 −1
Original line number Diff line number Diff line
@@ -362,9 +362,23 @@ struct kvm_debugregs {
	__u64 reserved[9];
};

/* for KVM_CAP_XSAVE */
/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
	/*
	 * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
	 * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
	 * respectively, when invoked on the vm file descriptor.
	 *
	 * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
	 * will always be at least 4096. Currently, it is only greater
	 * than 4096 if a dynamic feature has been enabled with
	 * ``arch_prctl()``, but this may change in the future.
	 *
	 * The offsets of the state save areas in struct kvm_xsave follow
	 * the contents of CPUID leaf 0xD on the host.
	 */
	__u32 region[1024];
	__u32 extra[0];
};

#define KVM_MAX_XCRS	16
@@ -427,6 +441,9 @@ struct kvm_sync_regs {

#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001

/* attributes for system fd (group 0) */
#define KVM_X86_XCOMP_GUEST_SUPP	0

struct kvm_vmx_nested_state_data {
	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
Loading