Commit 2ab47045 authored by Dan Williams's avatar Dan Williams
Browse files

cxl/region: Flag partially torn down regions as unusable

cxl_region_decode_reset() walks all the decoders associated with a given
region and disables them. Due to decoder ordering rules it is possible
that a switch in the topology notices that a given decoder can not be
shutdown before another region with a higher HPA is shutdown first. That
can leave the region in a partially committed state.

Capture that state in a new CXL_REGION_F_NEEDS_RESET flag and require
that a successful cxl_region_decode_reset() attempt must be completed
before cxl_region_probe() accepts the region.

This is a corollary for the bug that Jonathan identified in "CXL/region
:  commit reset of out of order region appears to succeed." [1].

Cc: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
Link: http://lore.kernel.org/r/20230316171441.0000205b@Huawei.com

 [1]
Fixes: 176baefb ("cxl/hdm: Commit decoder state to hardware")
Reviewed-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/168696507423.3590522.16254212607926684429.stgit@dwillia2-xfh.jf.intel.com


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent d1257d09
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -182,14 +182,19 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
				rc = cxld->reset(cxld);
			if (rc)
				return rc;
			set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
		}

endpoint_reset:
		rc = cxled->cxld.reset(&cxled->cxld);
		if (rc)
			return rc;
		set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
	}

	/* all decoders associated with this region have been torn down */
	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);

	return 0;
}

@@ -2864,6 +2869,13 @@ static int cxl_region_probe(struct device *dev)
		goto out;
	}

	if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
		dev_err(&cxlr->dev,
			"failed to activate, re-commit region and retry\n");
		rc = -ENXIO;
		goto out;
	}

	/*
	 * From this point on any path that changes the region's state away from
	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
+8 −0
Original line number Diff line number Diff line
@@ -469,6 +469,14 @@ struct cxl_region_params {
 */
#define CXL_REGION_F_AUTO 0

/*
 * Require that a committed region successfully complete a teardown once
 * any of its associated decoders have been torn down. This maintains
 * the commit state for the region since there are committed decoders,
 * but blocks cxl_region_probe().
 */
#define CXL_REGION_F_NEEDS_RESET 1

/**
 * struct cxl_region - CXL region
 * @dev: This region's device