Commit af984c87 authored by James Smart's avatar James Smart Committed by Martin K. Petersen
Browse files

scsi: lpfc: Allow fabric node recovery if recovery is in progress before devloss

A link bounce to a slow fabric may observe FDISC response delays lasting
longer than devloss tmo.  Current logic decrements the final fabric node
kref during a devloss tmo event.  This results in a NULL ptr dereference
crash if the FDISC completes for that fabric node after devloss tmo.

Fix by adding the NLP_IN_RECOV_POST_DEV_LOSS flag, which is set when
devloss tmo triggers and we've noticed that fabric node recovery has
already started or finished in between the time lpfc_dev_loss_tmo_callbk
queues lpfc_dev_loss_tmo_handler.  If fabric node recovery succeeds, then
the driver reverses the devloss tmo marked kref put with a kref get.  If
fabric node recovery fails, then the final kref put relies on the ELS
timing out or the REG_LOGIN cmpl routine.

Link: https://lore.kernel.org/r/20211020211417.88754-8-jsmart2021@gmail.com


Co-developed-by: default avatarJustin Tee <justin.tee@broadcom.com>
Signed-off-by: default avatarJustin Tee <justin.tee@broadcom.com>
Signed-off-by: default avatarJames Smart <jsmart2021@gmail.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 1854f53c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -119,6 +119,8 @@ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did);
struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
int  lpfc_nlp_put(struct lpfc_nodelist *);
void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
				 struct lpfc_nodelist *ndlp);
void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
			  struct lpfc_iocbq *rspiocb);
int  lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
+10 −2
Original line number Diff line number Diff line
@@ -85,6 +85,13 @@ enum lpfc_fc4_xpt_flags {
	NLP_XPT_HAS_HH		= 0x10
};

enum lpfc_nlp_save_flags {
	/* devloss occurred during recovery */
	NLP_IN_RECOV_POST_DEV_LOSS	= 0x1,
	/* wait for outstanding LOGO to cmpl */
	NLP_WAIT_FOR_LOGO		= 0x2,
};

struct lpfc_nodelist {
	struct list_head nlp_listp;
	struct serv_parm fc_sparam;		/* buffer for service params */
@@ -144,8 +151,9 @@ struct lpfc_nodelist {
	unsigned long *active_rrqs_xri_bitmap;
	struct lpfc_scsicmd_bkt *lat_data;	/* Latency data */
	uint32_t fc4_prli_sent;
	u32 upcall_flags;
#define	NLP_WAIT_FOR_LOGO 0x2

	/* flags to keep ndlp alive until special conditions are met */
	enum lpfc_nlp_save_flags save_flags;

	enum lpfc_fc4_xpt_flags fc4_xpt_flags;

+5 −2
Original line number Diff line number Diff line
@@ -2905,9 +2905,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
	irsp = &(rspiocb->iocb);
	spin_lock_irq(&ndlp->lock);
	ndlp->nlp_flag &= ~NLP_LOGO_SND;
	if (ndlp->upcall_flags & NLP_WAIT_FOR_LOGO) {
	if (ndlp->save_flags & NLP_WAIT_FOR_LOGO) {
		wake_up_waiter = 1;
		ndlp->upcall_flags &= ~NLP_WAIT_FOR_LOGO;
		ndlp->save_flags &= ~NLP_WAIT_FOR_LOGO;
	}
	spin_unlock_irq(&ndlp->lock);

@@ -10735,6 +10735,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
				 irsp->ulpStatus, irsp->un.ulpWord[4]);
		goto fdisc_failed;
	}

	lpfc_check_nlp_post_devloss(vport, ndlp);

	spin_lock_irq(shost->host_lock);
	vport->fc_flag &= ~FC_VPORT_CVL_RCVD;
	vport->fc_flag &= ~FC_VPORT_LOGO_RCVD;
+109 −2
Original line number Diff line number Diff line
@@ -209,6 +209,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)

	spin_lock_irqsave(&ndlp->lock, iflags);
	ndlp->nlp_flag |= NLP_IN_DEV_LOSS;

	/* If there is a PLOGI in progress, and we are in a
	 * NLP_NPR_2B_DISC state, don't turn off the flag.
	 */
	if (ndlp->nlp_state != NLP_STE_PLOGI_ISSUE)
		ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;

	/*
@@ -340,6 +345,37 @@ static void lpfc_check_inactive_vmid(struct lpfc_hba *phba)
	lpfc_destroy_vport_work_array(phba, vports);
}

/**
 * lpfc_check_nlp_post_devloss - Check to restore ndlp refcnt after devloss
 * @vport: Pointer to vport object.
 * @ndlp: Pointer to remote node object.
 *
 * If NLP_IN_RECOV_POST_DEV_LOSS flag was set due to outstanding recovery of
 * node during dev_loss_tmo processing, then this function restores the nlp_put
 * kref decrement from lpfc_dev_loss_tmo_handler.
 **/
void
lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
			    struct lpfc_nodelist *ndlp)
{
	unsigned long iflags;

	spin_lock_irqsave(&ndlp->lock, iflags);
	if (ndlp->save_flags & NLP_IN_RECOV_POST_DEV_LOSS) {
		ndlp->save_flags &= ~NLP_IN_RECOV_POST_DEV_LOSS;
		spin_unlock_irqrestore(&ndlp->lock, iflags);
		lpfc_nlp_get(ndlp);
		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE,
				 "8438 Devloss timeout reversed on DID x%x "
				 "refcnt %d ndlp %p flag x%x "
				 "port_state = x%x\n",
				 ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp,
				 ndlp->nlp_flag, vport->port_state);
		spin_lock_irqsave(&ndlp->lock, iflags);
	}
	spin_unlock_irqrestore(&ndlp->lock, iflags);
}

/**
 * lpfc_dev_loss_tmo_handler - Remote node devloss timeout handler
 * @ndlp: Pointer to remote node object.
@@ -358,6 +394,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
	uint8_t *name;
	int warn_on = 0;
	int fcf_inuse = 0;
	bool recovering = false;
	struct fc_vport *fc_vport = NULL;
	unsigned long iflags;

	vport = ndlp->vport;
@@ -394,6 +432,64 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)

	/* Fabric nodes are done. */
	if (ndlp->nlp_type & NLP_FABRIC) {
		spin_lock_irqsave(&ndlp->lock, iflags);
		/* In massive vport configuration settings, it's possible
		 * dev_loss_tmo fired during node recovery.  So, check if
		 * fabric nodes are in discovery states outstanding.
		 */
		switch (ndlp->nlp_DID) {
		case Fabric_DID:
			fc_vport = vport->fc_vport;
			if (fc_vport &&
			    fc_vport->vport_state == FC_VPORT_INITIALIZING)
				recovering = true;
			break;
		case Fabric_Cntl_DID:
			if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
				recovering = true;
			break;
		case FDMI_DID:
			fallthrough;
		case NameServer_DID:
			if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE &&
			    ndlp->nlp_state <= NLP_STE_REG_LOGIN_ISSUE)
				recovering = true;
			break;
		}
		spin_unlock_irqrestore(&ndlp->lock, iflags);

		/* Mark an NLP_IN_RECOV_POST_DEV_LOSS flag to know if reversing
		 * the following lpfc_nlp_put is necessary after fabric node is
		 * recovered.
		 */
		if (recovering) {
			lpfc_printf_vlog(vport, KERN_INFO,
					 LOG_DISCOVERY | LOG_NODE,
					 "8436 Devloss timeout marked on "
					 "DID x%x refcnt %d ndlp %p "
					 "flag x%x port_state = x%x\n",
					 ndlp->nlp_DID, kref_read(&ndlp->kref),
					 ndlp, ndlp->nlp_flag,
					 vport->port_state);
			spin_lock_irqsave(&ndlp->lock, iflags);
			ndlp->save_flags |= NLP_IN_RECOV_POST_DEV_LOSS;
			spin_unlock_irqrestore(&ndlp->lock, iflags);
		} else if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
			/* Fabric node fully recovered before this dev_loss_tmo
			 * queue work is processed.  Thus, ignore the
			 * dev_loss_tmo event.
			 */
			lpfc_printf_vlog(vport, KERN_INFO,
					 LOG_DISCOVERY | LOG_NODE,
					 "8437 Devloss timeout ignored on "
					 "DID x%x refcnt %d ndlp %p "
					 "flag x%x port_state = x%x\n",
					 ndlp->nlp_DID, kref_read(&ndlp->kref),
					 ndlp, ndlp->nlp_flag,
					 vport->port_state);
			return fcf_inuse;
		}

		lpfc_nlp_put(ndlp);
		return fcf_inuse;
	}
@@ -423,6 +519,14 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
				 ndlp->nlp_state, ndlp->nlp_rpi);
	}

	/* If we are devloss, but we are in the process of rediscovering the
	 * ndlp, don't issue a NLP_EVT_DEVICE_RM event.
	 */
	if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE &&
	    ndlp->nlp_state <= NLP_STE_PRLI_ISSUE) {
		return fcf_inuse;
	}

	if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);

@@ -4363,6 +4467,8 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
		goto out;
	}

	lpfc_check_nlp_post_devloss(vport, ndlp);

	if (phba->sli_rev < LPFC_SLI_REV4)
		ndlp->nlp_rpi = mb->un.varWords[0];

@@ -4540,9 +4646,10 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
void
lpfc_nlp_reg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
{

	unsigned long iflags;

	lpfc_check_nlp_post_devloss(vport, ndlp);

	spin_lock_irqsave(&ndlp->lock, iflags);
	if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) {
		/* Already registered with backend, trigger rescan */
+8 −4
Original line number Diff line number Diff line
@@ -3753,12 +3753,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
					lpfc_disc_state_machine(vports[i], ndlp,
						NULL, NLP_EVT_DEVICE_RECOVERY);

					/* Don't remove the node unless the
					/* Don't remove the node unless the node
					 * has been unregistered with the
					 * transport.  If so, let dev_loss
					 * take care of the node.
					 */
					if (!(ndlp->fc4_xpt_flags &
					 * transport, and we're not in recovery
					 * before dev_loss_tmo triggered.
					 * Otherwise, let dev_loss take care of
					 * the node.
					 */
					if (!(ndlp->save_flags &
					      NLP_IN_RECOV_POST_DEV_LOSS) &&
					    !(ndlp->fc4_xpt_flags &
					      (NVME_XPT_REGD | SCSI_XPT_REGD)))
						lpfc_disc_state_machine
							(vports[i], ndlp,
Loading