Commit e9b11083 authored by James Smart's avatar James Smart Committed by Martin K. Petersen
Browse files

scsi: lpfc: Fix refcounting around SCSI and NVMe transport APIs

Due to bug history and code review, the node reference counting approach in
the driver isn't implemented consistently with how the scsi and nvme
transport perform registrations and unregistrations and their callbacks.
This resulted in many bad/stale node pointers.

Reword the driver so that reference handling is performed as follows:

 - The initial node reference is taken on structure allocation

 - Take a reference on any add/register call to the transport

 - Remove a reference on any delete/unregister call to the transport

 - After the node has fully removed from both the SCSI and NVMEe transports
   (dev_loss_callbacks have called back) call the discovery engine
   DEVICE_RM event which will remove the final reference and release the
   node structure.

 - Alter dev_loss handling when a vport or base port is unloading.

 - Remove the put_node handling - no longer needed.

 - Rewrite the vport_delete handling on reference counts.  Part of this
   effort was driven from the FDISC not registering with the transport and
   disrupting the model for node reference counting.

 - Deleted lpfc_nlp_remove.  Pushed it's remaining ops into
   lpfc_nlp_release.

 - Several other small code cleanups.

Link: https://lore.kernel.org/r/20201115192646.12977-5-james.smart@broadcom.com


Co-developed-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarDick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: default avatarJames Smart <james.smart@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 95f0ef8a
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -131,13 +131,17 @@ struct lpfc_nodelist {
	unsigned long *active_rrqs_xri_bitmap;
	struct lpfc_scsicmd_bkt *lat_data;	/* Latency data */
	uint32_t fc4_prli_sent;
	uint32_t upcall_flags;
	uint32_t fc4_xpt_flags;
#define NLP_WAIT_FOR_UNREG    0x1
#define SCSI_XPT_REGD         0x2
#define NVME_XPT_REGD         0x4


	uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
#define NVME_FB_BIT_SHIFT 9    /* PRLI Rsp first burst in 512B units. */
	uint32_t nlp_defer_did;
};

struct lpfc_node_rrq {
	struct list_head list;
	uint16_t xritag;
+29 −60
Original line number Diff line number Diff line
@@ -1554,7 +1554,7 @@ lpfc_more_plogi(struct lpfc_vport *vport)
}

/**
 * lpfc_plogi_confirm_nport - Confirm pologi wwpn matches stored ndlp
 * lpfc_plogi_confirm_nport - Confirm plogi wwpn matches stored ndlp
 * @phba: pointer to lpfc hba data structure.
 * @prsp: pointer to response IOCB payload.
 * @ndlp: pointer to a node-list data structure.
@@ -1591,8 +1591,6 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
	struct lpfc_vport *vport = ndlp->vport;
	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
	struct lpfc_nodelist *new_ndlp;
	struct lpfc_rport_data *rdata;
	struct fc_rport *rport;
	struct serv_parm *sp;
	uint8_t  name[sizeof(struct lpfc_name)];
	uint32_t rc, keepDID = 0, keep_nlp_flag = 0;
@@ -1600,7 +1598,6 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
	uint16_t keep_nlp_state;
	u32 keep_nlp_fc4_type = 0;
	struct lpfc_nvme_rport *keep_nrport = NULL;
	int  put_node;
	unsigned long *active_rrqs_xri_bitmap = NULL;

	/* Fabric nodes can have the same WWPN so we don't bother searching
@@ -1730,26 +1727,6 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
			 "3179 PLOGI confirm NEW: %x %x\n",
			 new_ndlp->nlp_DID, keepDID);

		/* Fix up the rport accordingly */
		rport =  ndlp->rport;
		if (rport) {
			rdata = rport->dd_data;
			if (rdata->pnode == ndlp) {
				/* break the link before dropping the ref */
				ndlp->rport = NULL;
				lpfc_nlp_put(ndlp);
				rdata->pnode = lpfc_nlp_get(new_ndlp);
				new_ndlp->rport = rport;
			}
			new_ndlp->nlp_type = ndlp->nlp_type;
		}

		/* Fix up the nvme rport */
		if (ndlp->nrport) {
			ndlp->nrport = NULL;
			lpfc_nlp_put(ndlp);
		}

		/* Two ndlps cannot have the same did on the nodelist.
		 * Note: for this case, ndlp has a NULL WWPN so setting
		 * the nlp_fc4_type isn't required.
@@ -1789,25 +1766,7 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
		    (ndlp->nlp_state == NLP_STE_MAPPED_NODE))
			keep_nlp_state = NLP_STE_NPR_NODE;
		lpfc_nlp_set_state(vport, ndlp, keep_nlp_state);

		/* Previous ndlp no longer active with nvme host transport.
		 * Remove reference from earlier registration unless the
		 * nvme host took care of it.
		 */
		if (ndlp->nrport)
			lpfc_nlp_put(ndlp);
		ndlp->nrport = keep_nrport;

		/* Fix up the rport accordingly */
		rport = ndlp->rport;
		if (rport) {
			rdata = rport->dd_data;
			put_node = rdata->pnode != NULL;
			rdata->pnode = NULL;
			ndlp->rport = NULL;
			if (put_node)
				lpfc_nlp_put(ndlp);
		}
	}

	/*
@@ -2027,10 +1986,12 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
				 "2753 PLOGI failure DID:%06X Status:x%x/x%x\n",
				 ndlp->nlp_DID, irsp->ulpStatus,
				 irsp->un.ulpWord[4]);
		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
		/* Do not call DSM for lpfc_els_abort'ed ELS cmds. Just execute
		 * the final node put to free it to the pool.
		 */
		if (!lpfc_error_lost_link(irsp))
			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
						NLP_EVT_CMPL_PLOGI);
						NLP_EVT_DEVICE_RM);
	} else {
		/* Good status, call state machine */
		prsp = list_entry(((struct lpfc_dmabuf *)
@@ -4439,15 +4400,16 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
	mempool_free(pmb, phba->mbox_mem_pool);
	if (ndlp) {
		lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
				 "0006 rpi%x DID:%x flg:%x %d x%px\n",
				 "0006 rpi x%x DID:%x flg:%x %d x%px\n",
				 ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag,
				 kref_read(&ndlp->kref),
				 ndlp);
		lpfc_nlp_put(ndlp);
		/* This is the end of the default RPI cleanup logic for
		 * this ndlp. If no other discovery threads are using
		 * this ndlp, free all resources associated with it.
		 * this ndlp and it could get released.  Clear the nlp_flags to
		 * prevent any further processing.
		 */
		ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
		lpfc_nlp_put(ndlp);
		lpfc_nlp_not_used(ndlp);
	}

@@ -4652,7 +4614,6 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
				 * the routine lpfc_els_free_iocb.
				 */
				cmdiocb->context1 = NULL;

	}

	/* Release the originating I/O reference. */
@@ -8945,6 +8906,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
			lpfc_nlp_put(ndlp);
	}

	/* Release the reference on this elsiocb, not the ndlp. */
	lpfc_nlp_put(elsiocb->context1);
	elsiocb->context1 = NULL;

@@ -9261,9 +9223,10 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
					lpfc_start_fdiscs(phba);
				lpfc_do_scr_ns_plogi(phba, vport);
			}
		} else
		} else {
			lpfc_do_scr_ns_plogi(phba, vport);
		}
	}
mbox_err_exit:
	/* Now, we decrement the ndlp reference count held for this
	 * callback function
@@ -9536,6 +9499,7 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
		 * to update the MAC address.
		 */
		lpfc_register_new_vport(phba, vport, ndlp);
		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
		goto out;
	}

@@ -9545,7 +9509,13 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
		lpfc_register_new_vport(phba, vport, ndlp);
	else
		lpfc_do_scr_ns_plogi(phba, vport);

	/* The FDISC completed successfully. Move the fabric ndlp to
	 * UNMAPPED state and register with the transport.
	 */
	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
	goto out;

fdisc_failed:
	if (vport->fc_vport &&
	    (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS))
@@ -9697,19 +9667,14 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
		"LOGO npiv cmpl:  status:x%x/x%x did:x%x",
		irsp->ulpStatus, irsp->un.ulpWord[4], irsp->un.rcvels.remoteID);

	lpfc_nlp_put(ndlp);
	lpfc_els_free_iocb(phba, cmdiocb);
	vport->unreg_vpi_cmpl = VPORT_ERROR;

	/* Trigger the release of the ndlp after logo */
	lpfc_nlp_put(ndlp);

	/* NPIV LOGO completes to NPort <nlp_DID> */
	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
			 "2928 NPIV LOGO completes to NPort x%x "
			 "Data: x%x x%x x%x x%x\n",
			 "Data: x%x x%x x%x x%x x%x x%x x%x\n",
			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
			 irsp->ulpTimeout, vport->num_disc_nodes);
			 irsp->ulpTimeout, vport->num_disc_nodes,
			 kref_read(&ndlp->kref), ndlp->nlp_flag,
			 ndlp->fc4_xpt_flags);

	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
		spin_lock_irq(shost->host_lock);
@@ -9718,7 +9683,11 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
		spin_unlock_irq(shost->host_lock);
		lpfc_can_disctmo(vport);
	}

	/* Safe to release resources now. */
	lpfc_els_free_iocb(phba, cmdiocb);
	lpfc_nlp_put(ndlp);
	vport->unreg_vpi_cmpl = VPORT_ERROR;
}

/**
+31 −18
Original line number Diff line number Diff line
@@ -2859,10 +2859,15 @@ lpfc_cleanup(struct lpfc_vport *vport)
			continue;
		}

		if (ndlp->nlp_type & NLP_FABRIC)
		/* Fabric Ports not in UNMAPPED state are cleaned up in the
		 * DEVICE_RM event.
		 */
		if (ndlp->nlp_type & NLP_FABRIC &&
		    ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
			lpfc_disc_state_machine(vport, ndlp, NULL,
					NLP_EVT_DEVICE_RECOVERY);

		if (!(ndlp->fc4_xpt_flags & (NVME_XPT_REGD|SCSI_XPT_REGD)))
			lpfc_disc_state_machine(vport, ndlp, NULL,
					NLP_EVT_DEVICE_RM);
	}
@@ -2881,9 +2886,11 @@ lpfc_cleanup(struct lpfc_vport *vport)
				lpfc_printf_vlog(ndlp->vport, KERN_ERR,
						 LOG_TRACE_EVENT,
						 "0282 did:x%x ndlp:x%px "
						"refcnt:%d\n",
						 "refcnt:%d xflags x%x nflag x%x\n",
						 ndlp->nlp_DID, (void *)ndlp,
						kref_read(&ndlp->kref));
						 kref_read(&ndlp->kref),
						 ndlp->fc4_xpt_flags,
						 ndlp->nlp_flag);
			}
			break;
		}
@@ -3499,10 +3506,10 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
				 * comes back online.
				 */
				if (phba->sli_rev == LPFC_SLI_REV4) {
					lpfc_printf_vlog(ndlp->vport, KERN_INFO,
					lpfc_printf_vlog(vports[i], KERN_INFO,
						 LOG_NODE | LOG_DISCOVERY,
						 "0011 Free RPI x%x on "
						 "ndlp:x%px did x%x\n",
						 "ndlp: %p did x%x\n",
						 ndlp->nlp_rpi, ndlp,
						 ndlp->nlp_DID);
					lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi);
@@ -3513,8 +3520,18 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
				if (ndlp->nlp_type & NLP_FABRIC) {
					lpfc_disc_state_machine(vports[i], ndlp,
						NULL, NLP_EVT_DEVICE_RECOVERY);
					lpfc_disc_state_machine(vports[i], ndlp,
						NULL, NLP_EVT_DEVICE_RM);

					/* Don't remove the node unless the
					 * has been unregistered with the
					 * transport.  If so, let dev_loss
					 * take care of the node.
					 */
					if (!(ndlp->fc4_xpt_flags &
					      (NVME_XPT_REGD | SCSI_XPT_REGD)))
						lpfc_disc_state_machine
							(vports[i], ndlp,
							 NULL,
							 NLP_EVT_DEVICE_RM);
				}
			}
		}
@@ -12501,13 +12518,11 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)

	/* Remove FC host with the physical port */
	fc_remove_host(shost);
	scsi_remove_host(shost);

	/* Clean up all nodes, mailboxes and IOs. */
	lpfc_cleanup(vport);

	/* Remove the shost now that the devices connections are lost. */
	scsi_remove_host(shost);

	/*
	 * Bring down the SLI Layer. This step disable all interrupts,
	 * clears the rings, discards all mailbox commands, and resets
@@ -13359,6 +13374,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)

	/* Remove FC host with the physical port */
	fc_remove_host(shost);
	scsi_remove_host(shost);

	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
	 * localports are destroyed after to cleanup all transport memory.
@@ -13371,9 +13387,6 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
	if (phba->cfg_xri_rebalancing)
		lpfc_destroy_multixri_pools(phba);

	/* Remove the shost now that the devices connections are lost. */
	scsi_remove_host(shost);

	/*
	 * Bring down the SLI Layer. This step disables all interrupts,
	 * clears the rings, discards all mailbox commands, and resets
+41 −21
Original line number Diff line number Diff line
@@ -356,39 +356,47 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
	struct lpfc_nvme_rport *rport = remoteport->private;
	struct lpfc_vport *vport;
	struct lpfc_nodelist *ndlp;
	u32 fc4_xpt_flags;

	ndlp = rport->ndlp;
	if (!ndlp)
	if (!ndlp) {
		pr_err("**** %s: NULL ndlp on rport %p remoteport %p\n",
		       __func__, rport, remoteport);
		goto rport_err;
	}

	vport = ndlp->vport;
	if (!vport)
	if (!vport) {
		pr_err("**** %s: Null vport on ndlp %p, ste x%x rport %p\n",
		       __func__, ndlp, ndlp->nlp_state, rport);
		goto rport_err;
	}

	fc4_xpt_flags = NVME_XPT_REGD | SCSI_XPT_REGD;

	/* Remove this rport from the lport's list - memory is owned by the
	 * transport. Remove the ndlp reference for the NVME transport before
	 * calling state machine to remove the node.
	 */
	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
			"6146 remoteport delete of remoteport x%px\n",
			"6146 remoteport delete of remoteport %p\n",
			remoteport);
	spin_lock_irq(&vport->phba->hbalock);

	/* The register rebind might have occurred before the delete
	 * downcall.  Guard against this race.
	 */
	if (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG) {
		ndlp->nrport = NULL;
		ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
	if (ndlp->fc4_xpt_flags & NLP_WAIT_FOR_UNREG)
		ndlp->fc4_xpt_flags &= ~(NLP_WAIT_FOR_UNREG | NVME_XPT_REGD);

	spin_unlock_irq(&vport->phba->hbalock);

		/* Remove original register reference. The host transport
		 * won't reference this rport/remoteport any further.
	/* On a devloss timeout event, one more put is executed provided the
	 * NVME and SCSI rport unregister requests are complete.  If the vport
	 * is unloading, this extra put is executed by lpfc_drop_node.
	 */
		lpfc_nlp_put(ndlp);
	} else {
		spin_unlock_irq(&vport->phba->hbalock);
	}
	if (!(ndlp->fc4_xpt_flags & fc4_xpt_flags))
		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);

 rport_err:
	return;
@@ -660,6 +668,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
				 "Data: x%x x%x  rc x%x\n",
				 ndlp->nlp_DID, genwqe->iotag,
				 vport->port_state, rc);
		lpfc_nlp_put(ndlp);
		lpfc_sli_release_iocbq(phba, genwqe);
		return 1;
	}
@@ -1687,7 +1696,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
				 "IO. State x%x, Type x%x Flg x%x\n",
				 pnvme_rport->port_id,
				 ndlp->nlp_state, ndlp->nlp_type,
				 ndlp->upcall_flags);
				 ndlp->fc4_xpt_flags);
		atomic_inc(&lport->xmt_fcp_bad_ndlp);
		ret = -EBUSY;
		goto out_fail;
@@ -2483,7 +2492,8 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
		 * race that leaves the WAIT flag set.
		 */
		spin_lock_irq(&vport->phba->hbalock);
		ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
		ndlp->fc4_xpt_flags &= ~NLP_WAIT_FOR_UNREG;
		ndlp->fc4_xpt_flags |= NVME_XPT_REGD;
		spin_unlock_irq(&vport->phba->hbalock);
		rport = remote_port->private;
		if (oldrport) {
@@ -2494,7 +2504,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
			 */
			spin_lock_irq(&vport->phba->hbalock);
			ndlp->nrport = NULL;
			ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
			ndlp->fc4_xpt_flags &= ~NLP_WAIT_FOR_UNREG;
			spin_unlock_irq(&vport->phba->hbalock);
			rport->ndlp = NULL;
			rport->remoteport = NULL;
@@ -2631,10 +2641,11 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)

	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
			 "6033 Unreg nvme remoteport x%px, portname x%llx, "
			 "port_id x%06x, portstate x%x port type x%x\n",
			 "port_id x%06x, portstate x%x port type x%x "
			 "refcnt %d\n",
			 remoteport, remoteport->port_name,
			 remoteport->port_id, remoteport->port_state,
			 ndlp->nlp_type);
			 ndlp->nlp_type, kref_read(&ndlp->kref));

	/* Sanity check ndlp type.  Only call for NVME ports. Don't
	 * clear any rport state until the transport calls back.
@@ -2644,7 +2655,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
		/* No concern about the role change on the nvme remoteport.
		 * The transport will update it.
		 */
		ndlp->upcall_flags |= NLP_WAIT_FOR_UNREG;
		spin_lock_irq(&vport->phba->hbalock);
		ndlp->fc4_xpt_flags |= NLP_WAIT_FOR_UNREG;
		spin_unlock_irq(&vport->phba->hbalock);

		/* Don't let the host nvme transport keep sending keep-alives
		 * on this remoteport. Vport is unloading, no recovery. The
@@ -2655,8 +2668,15 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
			(void)nvme_fc_set_remoteport_devloss(remoteport, 0);

		ret = nvme_fc_unregister_remoteport(remoteport);
		if (ret != 0) {

		/* The driver no longer knows if the nrport memory is valid.
		 * because the controller teardown process has begun and
		 * is asynchronous.  Break the binding in the ndlp. Also
		 * remove the register ndlp reference to setup node release.
		 */
		ndlp->nrport = NULL;
		lpfc_nlp_put(ndlp);
		if (ret != 0) {
			lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
					 "6167 NVME unregister failed %d "
					 "port_state x%x\n",
+1 −1
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@
#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"

#define lpfc_ndlp_get_nrport(ndlp)					\
	((!ndlp->nrport || (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG))	\
	((!ndlp->nrport || (ndlp->fc4_xpt_flags & NLP_WAIT_FOR_UNREG))	\
	? NULL : ndlp->nrport)

struct lpfc_nvme_qhandle {
Loading