Merge tag 'hyperv-fixes-signed-20221125' of... (081f359e) · Commits · EulixOS / Software / Kernel

arch/x86/hyperv/hv_init.c

+26 −28

Original line number	Diff line number	Diff line
		@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
		static int hv_cpu_init(unsigned int cpu)
		{
		union hv_vp_assist_msr_contents msr = { 0 };
		struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
		struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
		int ret;

		ret = hv_common_cpu_init(cpu);
		@@ -87,15 +87,13 @@ static int hv_cpu_init(unsigned int cpu)
		if (!hv_vp_assist_page)
		return 0;

		if (!*hvp) {
		if (hv_root_partition) {
		/*
		* For root partition we get the hypervisor provided VP assist
		* page, instead of allocating a new page.
		*/
		rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
		*hvp = memremap(msr.pfn <<
		HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
		*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
		PAGE_SIZE, MEMREMAP_WB);
		} else {
		/*
		@@ -106,16 +104,16 @@ static int hv_cpu_init(unsigned int cpu)
		* in hv_cpu_die(), otherwise a CPU may not be stopped in the
		* case of CPU offlining and the VM will hang.
		*/
		if (!*hvp)
		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL \| __GFP_ZERO);
		if (*hvp)
		msr.pfn = vmalloc_to_pfn(*hvp);

		}
		WARN_ON(!(*hvp));
		if (*hvp) {
		if (!WARN_ON(!(*hvp))) {
		msr.enable = 1;
		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
		}
		}

		return hyperv_init_ghcb();
		}

drivers/hv/channel_mgmt.c

+5 −1

Original line number	Diff line number	Diff line
		@@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work)
		* Add the new device to the bus. This will kick off device-driver
		* binding which eventually invokes the device driver's AddDevice()
		* method.
		*
		* If vmbus_device_register() fails, the 'device_obj' is freed in
		* vmbus_device_release() as called by device_unregister() in the
		* error path of vmbus_device_register(). In the outside error
		* path, there's no need to free it.
		*/
		ret = vmbus_device_register(newchannel->device_obj);

		if (ret != 0) {
		pr_err("unable to add child device object (relid %d)\n",
		newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
		}

drivers/hv/vmbus_drv.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
		ret = device_register(&child_device_obj->device);
		if (ret) {
		pr_err("Unable to register child device\n");
		put_device(&child_device_obj->device);
		return ret;
		}

drivers/pci/controller/pci-hyperv.c

+75 −15

Original line number	Diff line number	Diff line
		@@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void context, struct pci_response resp,
		}

		static u32 hv_compose_msi_req_v1(
		struct pci_create_interrupt int_pkt, const struct cpumask affinity,
		struct pci_create_interrupt *int_pkt,
		u32 slot, u8 vector, u16 vector_count)
		{
		int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
		@@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1(
		return sizeof(*int_pkt);
		}

		/*
		* The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
		* hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
		* interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
		* via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
		* not irrelevant because Hyper-V chooses the physical CPU to handle the
		* interrupts based on the vCPU specified in message sent to the vPCI VSP in
		* hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
		* but assigning too many vPCI device interrupts to the same pCPU can cause a
		* performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
		* to spread out the pCPUs that it selects.
		*
		* For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
		* to always return the same dummy vCPU, because a second call to
		* hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
		* new pCPU for the interrupt. But for the multi-MSI case, the second call to
		* hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
		* original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
		* the pCPUs are spread out. All interrupts for a multi-MSI device end up using
		* the same pCPU, even though the vCPUs will be spread out by later calls
		* to hv_irq_unmask(), but that is the best we can do now.
		*
		* With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does not
		* cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
		* enhancement is planned for a future version. With that enhancement, the
		* dummy vCPU selection won't matter, and interrupts for the same multi-MSI
		* device will be spread across multiple pCPUs.
		*/

		/*
		* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
		* by subsequent retarget in hv_irq_unmask().
		@@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
		return cpumask_first_and(affinity, cpu_online_mask);
		}

		static u32 hv_compose_msi_req_v2(
		struct pci_create_interrupt2 int_pkt, const struct cpumask affinity,
		u32 slot, u8 vector, u16 vector_count)
		/*
		* Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
		*/
		static int hv_compose_multi_msi_req_get_cpu(void)
		{
		static DEFINE_SPINLOCK(multi_msi_cpu_lock);

		/* -1 means starting with CPU 0 */
		static int cpu_next = -1;

		unsigned long flags;
		int cpu;

		spin_lock_irqsave(&multi_msi_cpu_lock, flags);

		cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
		false);
		cpu = cpu_next;

		spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);

		return cpu;
		}

		static u32 hv_compose_msi_req_v2(
		struct pci_create_interrupt2 *int_pkt, int cpu,
		u32 slot, u8 vector, u16 vector_count)
		{
		int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
		int_pkt->wslot.slot = slot;
		int_pkt->int_desc.vector = vector;
		int_pkt->int_desc.vector_count = vector_count;
		int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
		cpu = hv_compose_msi_req_get_cpu(affinity);
		int_pkt->int_desc.processor_array[0] =
		hv_cpu_number_to_vp_number(cpu);
		int_pkt->int_desc.processor_count = 1;
		@@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(
		}

		static u32 hv_compose_msi_req_v3(
		struct pci_create_interrupt3 int_pkt, const struct cpumask affinity,
		struct pci_create_interrupt3 *int_pkt, int cpu,
		u32 slot, u32 vector, u16 vector_count)
		{
		int cpu;

		int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
		int_pkt->wslot.slot = slot;
		int_pkt->int_desc.vector = vector;
		int_pkt->int_desc.reserved = 0;
		int_pkt->int_desc.vector_count = vector_count;
		int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
		cpu = hv_compose_msi_req_get_cpu(affinity);
		int_pkt->int_desc.processor_array[0] =
		hv_cpu_number_to_vp_number(cpu);
		int_pkt->int_desc.processor_count = 1;
		@@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		struct pci_create_interrupt3 v3;
		} int_pkts;
		} __packed ctxt;
		bool multi_msi;
		u64 trans_id;
		u32 size;
		int ret;
		int cpu;

		msi_desc = irq_data_get_msi_desc(data);
		multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
		msi_desc->nvec_used > 1;

		/* Reuse the previous allocation */
		if (data->chip_data) {
		if (data->chip_data && multi_msi) {
		int_desc = data->chip_data;
		msg->address_hi = int_desc->address >> 32;
		msg->address_lo = int_desc->address & 0xffffffff;
		@@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		return;
		}

		msi_desc = irq_data_get_msi_desc(data);
		pdev = msi_desc_to_pci_dev(msi_desc);
		dest = irq_data_get_effective_affinity_mask(data);
		pbus = pdev->bus;
		@@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		if (!hpdev)
		goto return_null_message;

		/* Free any previous message that might have already been composed. */
		if (data->chip_data && !multi_msi) {
		int_desc = data->chip_data;
		data->chip_data = NULL;
		hv_int_desc_free(hpdev, int_desc);
		}

		int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
		if (!int_desc)
		goto drop_reference;

		if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) {
		if (multi_msi) {
		/*
		* If this is not the first MSI of Multi MSI, we already have
		* a mapping. Can exit early.
		@@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		*/
		vector = 32;
		vector_count = msi_desc->nvec_used;
		cpu = hv_compose_multi_msi_req_get_cpu();
		} else {
		vector = hv_msi_get_int_vector(data);
		vector_count = 1;
		cpu = hv_compose_msi_req_get_cpu(dest);
		}

		/*
		@@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		switch (hbus->protocol_version) {
		case PCI_PROTOCOL_VERSION_1_1:
		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
		dest,
		hpdev->desc.win_slot.slot,
		(u8)vector,
		vector_count);
		@@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)
		case PCI_PROTOCOL_VERSION_1_2:
		case PCI_PROTOCOL_VERSION_1_3:
		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
		dest,
		cpu,
		hpdev->desc.win_slot.slot,
		(u8)vector,
		vector_count);
		@@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data data, struct msi_msg msg)

		case PCI_PROTOCOL_VERSION_1_4:
		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
		dest,
		cpu,
		hpdev->desc.win_slot.slot,
		vector,
		vector_count);

drivers/scsi/storvsc_drv.c

+34 −35

Original line number	Diff line number	Diff line
		@@ -303,16 +303,21 @@ enum storvsc_request_type {
		};

		/*
		* SRB status codes and masks; a subset of the codes used here.
		* SRB status codes and masks. In the 8-bit field, the two high order bits
		* are flags, while the remaining 6 bits are an integer status code. The
		* definitions here include only the subset of the integer status codes that
		* are tested for in this driver.
		*/

		#define SRB_STATUS_AUTOSENSE_VALID 0x80
		#define SRB_STATUS_QUEUE_FROZEN 0x40
		#define SRB_STATUS_INVALID_LUN 0x20

		/* SRB status integer codes */
		#define SRB_STATUS_SUCCESS 0x01
		#define SRB_STATUS_ABORTED 0x02
		#define SRB_STATUS_ERROR 0x04
		#define SRB_STATUS_INVALID_REQUEST 0x06
		#define SRB_STATUS_DATA_OVERRUN 0x12
		#define SRB_STATUS_INVALID_LUN 0x20

		#define SRB_STATUS(status) \
		(status & ~(SRB_STATUS_AUTOSENSE_VALID \| SRB_STATUS_QUEUE_FROZEN))
		@@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
		void (process_err_fn)(struct work_struct work);
		struct hv_host_device *host_dev = shost_priv(host);

		/*
		* In some situations, Hyper-V sets multiple bits in the
		* srb_status, such as ABORTED and ERROR. So process them
		* individually, with the most specific bits first.
		*/

		if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
		set_host_byte(scmnd, DID_NO_CONNECT);
		process_err_fn = storvsc_remove_lun;
		goto do_work;
		}

		if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
		if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
		/* Capacity data has changed */
		(asc == 0x2a) && (ascq == 0x9)) {
		switch (SRB_STATUS(vm_srb->srb_status)) {
		case SRB_STATUS_ERROR:
		case SRB_STATUS_ABORTED:
		case SRB_STATUS_INVALID_REQUEST:
		if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
		/* Check for capacity change */
		if ((asc == 0x2a) && (ascq == 0x9)) {
		process_err_fn = storvsc_device_scan;
		/*
		* Retry the I/O that triggered this.
		*/
		/* Retry the I/O that triggered this. */
		set_host_byte(scmnd, DID_REQUEUE);
		goto do_work;
		}
		}

		if (vm_srb->srb_status & SRB_STATUS_ERROR) {
		/*
		* Let upper layer deal with error when
		* sense message is present.
		* Otherwise, let upper layer deal with the
		* error when sense message is present
		*/
		if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
		return;
		}

		/*
		* If there is an error; offline the device since all
		@@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
		default:
		set_host_byte(scmnd, DID_ERROR);
		}
		return;

		case SRB_STATUS_INVALID_LUN:
		set_host_byte(scmnd, DID_NO_CONNECT);
		process_err_fn = storvsc_remove_lun;
		goto do_work;

		}
		return;