Loading drivers/edac/ghes_edac.c +193 −130 Original line number Diff line number Diff line Loading @@ -15,9 +15,7 @@ #include "edac_module.h" #include <ras/ras_event.h> struct ghes_edac_pvt { struct list_head list; struct ghes *ghes; struct ghes_pvt { struct mem_ctl_info *mci; /* Buffers for the error handling routine */ Loading @@ -32,7 +30,16 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0); * also provides the necessary (implicit) memory barrier for the SMP * case to make the pointer visible on another CPU. */ static struct ghes_edac_pvt *ghes_pvt; static struct ghes_pvt *ghes_pvt; /* * This driver's representation of the system hardware, as collected * from DMI. */ struct ghes_hw_desc { int num_dimms; struct dimm_info *dimms; } ghes_hw; /* GHES registration mutex */ static DEFINE_MUTEX(ghes_reg_mutex); Loading Loading @@ -74,44 +81,35 @@ struct memdev_dmi_entry { u16 conf_mem_clk_speed; } __attribute__((__packed__)); struct ghes_edac_dimm_fill { struct mem_ctl_info *mci; unsigned int count; }; static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) { int *num_dimm = arg; if (dh->type == DMI_ENTRY_MEM_DEVICE) (*num_dimm)++; } static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle) static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle) { struct dimm_info *dimm; mci_for_each_dimm(mci, dimm) { if (dimm->smbios_handle == handle) return dimm->idx; return dimm; } return -1; return NULL; } static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) static void dimm_setup_label(struct dimm_info *dimm, u16 handle) { struct ghes_edac_dimm_fill *dimm_fill = arg; struct mem_ctl_info *mci = dimm_fill->mci; const char *bank = NULL, *device = NULL; if (dh->type == DMI_ENTRY_MEM_DEVICE) { struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0); dmi_memdev_name(handle, &bank, &device); /* both strings must be non-zero */ if (bank && *bank && device && *device) snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) { u16 rdr_mask = BIT(7) | BIT(13); if (entry->size == 0xffff) { pr_info("Can't get DIMM%i size\n", dimm_fill->count); pr_info("Can't get DIMM%i size\n", dimm->idx); dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ } else if (entry->size == 0x7fff) { dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); Loading Loading @@ -179,13 +177,11 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) dimm->dtype = DEV_UNKNOWN; dimm->grain = 128; /* Likely, worse case */ /* * FIXME: It shouldn't be hard to also fill the DIMM labels */ dimm_setup_label(dimm, entry->handle); if (dimm->nr_pages) { edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", dimm_fill->count, edac_mem_types[dimm->mtype], dimm->idx, edac_mem_types[dimm->mtype], PAGES_TO_MiB(dimm->nr_pages), (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", Loading @@ -194,16 +190,56 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) } dimm->smbios_handle = entry->handle; } dimm_fill->count++; static void enumerate_dimms(const struct dmi_header *dh, void *arg) { struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg; struct dimm_info *d; if (dh->type != DMI_ENTRY_MEM_DEVICE) return; /* Enlarge the array with additional 16 */ if (!hw->num_dimms || !(hw->num_dimms % 16)) { struct dimm_info *new; new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info), GFP_KERNEL); if (!new) { WARN_ON_ONCE(1); return; } hw->dimms = new; } d = &hw->dimms[hw->num_dimms]; d->idx = hw->num_dimms; assign_dmi_dimm_info(d, entry); hw->num_dimms++; } static void ghes_scan_system(void) { static bool scanned; if (scanned) return; dmi_walk(enumerate_dimms, &ghes_hw); scanned = true; } void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { struct edac_raw_error_desc *e; struct mem_ctl_info *mci; struct ghes_edac_pvt *pvt; struct ghes_pvt *pvt; unsigned long flags; char *p; Loading @@ -228,7 +264,6 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) memset(e, 0, sizeof (*e)); e->error_count = 1; e->grain = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; e->top_layer = -1; Loading Loading @@ -345,7 +380,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; int index = -1; struct dimm_info *dimm; dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); if (bank != NULL && device != NULL) Loading @@ -354,13 +389,18 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle); if (index >= 0) e->top_layer = index; dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; strcpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) strcpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; p += snprintf(p, sizeof(pvt->other_detail), Loading Loading @@ -455,13 +495,12 @@ static struct acpi_platform_list plat_list[] = { int ghes_edac_register(struct ghes *ghes, struct device *dev) { bool fake = false; int rc = 0, num_dimm = 0; struct mem_ctl_info *mci; struct ghes_edac_pvt *pvt; struct ghes_pvt *pvt; struct edac_mc_layer layers[1]; struct ghes_edac_dimm_fill dimm_fill; unsigned long flags; int idx = -1; int rc = 0; if (IS_ENABLED(CONFIG_X86)) { /* Check if safe to enable on this system */ Loading @@ -481,20 +520,19 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) if (refcount_inc_not_zero(&ghes_refcount)) goto unlock; /* Get the number of DIMMs */ dmi_walk(ghes_edac_count_dimms, &num_dimm); ghes_scan_system(); /* Check if we've got a bogus BIOS */ if (num_dimm == 0) { if (!ghes_hw.num_dimms) { fake = true; num_dimm = 1; ghes_hw.num_dimms = 1; } layers[0].type = EDAC_MC_LAYER_ALL_MEM; layers[0].size = num_dimm; layers[0].size = ghes_hw.num_dimms; layers[0].is_virt_csrow = true; mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt)); mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt)); if (!mci) { pr_info("Can't allocate memory for EDAC data\n"); rc = -ENOMEM; Loading @@ -502,7 +540,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) } pvt = mci->pvt_info; pvt->ghes = ghes; pvt->mci = mci; mci->pdev = dev; Loading @@ -523,13 +560,34 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); pr_info("If you find incorrect reports, please contact your hardware vendor\n"); pr_info("to correct its BIOS.\n"); pr_info("This system has %d DIMM sockets.\n", num_dimm); pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms); } if (!fake) { dimm_fill.count = 0; dimm_fill.mci = mci; dmi_walk(ghes_edac_dmidecode, &dimm_fill); struct dimm_info *src, *dst; int i = 0; mci_for_each_dimm(mci, dst) { src = &ghes_hw.dimms[i]; dst->idx = src->idx; dst->smbios_handle = src->smbios_handle; dst->nr_pages = src->nr_pages; dst->mtype = src->mtype; dst->edac_mode = src->edac_mode; dst->dtype = src->dtype; dst->grain = src->grain; /* * If no src->label, preserve default label assigned * from EDAC core. */ if (strlen(src->label)) memcpy(dst->label, src->label, sizeof(src->label)); i++; } } else { struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0); Loading @@ -542,7 +600,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) rc = edac_mc_add_mc(mci); if (rc < 0) { pr_info("Can't register at EDAC core\n"); pr_info("Can't register with the EDAC core\n"); edac_mc_free(mci); rc = -ENODEV; goto unlock; Loading @@ -556,6 +614,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) refcount_set(&ghes_refcount, 1); unlock: /* Not needed anymore */ kfree(ghes_hw.dimms); ghes_hw.dimms = NULL; mutex_unlock(&ghes_reg_mutex); return rc; Loading include/linux/edac.h +7 −22 Original line number Diff line number Diff line Loading @@ -594,27 +594,6 @@ struct mem_ctl_info { ? (mci)->dimms[(dimm)->idx + 1] \ : NULL) /** * edac_get_dimm_by_index - Get DIMM info at @index from a memory * controller * * @mci: MC descriptor struct mem_ctl_info * @index: index in the memory controller's DIMM array * * Returns a struct dimm_info * or NULL on failure. */ static inline struct dimm_info * edac_get_dimm_by_index(struct mem_ctl_info *mci, int index) { if (index < 0 || index >= mci->tot_dimms) return NULL; if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) return NULL; return mci->dimms[index]; } /** * edac_get_dimm - Get DIMM info from a memory controller given by * [layer0,layer1,layer2] position Loading Loading @@ -650,6 +629,12 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, if (mci->n_layers > 2) index = index * mci->layers[2].size + layer2; return edac_get_dimm_by_index(mci, index); if (index < 0 || index >= mci->tot_dimms) return NULL; if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) return NULL; return mci->dimms[index]; } #endif /* _LINUX_EDAC_H_ */ Loading
drivers/edac/ghes_edac.c +193 −130 Original line number Diff line number Diff line Loading @@ -15,9 +15,7 @@ #include "edac_module.h" #include <ras/ras_event.h> struct ghes_edac_pvt { struct list_head list; struct ghes *ghes; struct ghes_pvt { struct mem_ctl_info *mci; /* Buffers for the error handling routine */ Loading @@ -32,7 +30,16 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0); * also provides the necessary (implicit) memory barrier for the SMP * case to make the pointer visible on another CPU. */ static struct ghes_edac_pvt *ghes_pvt; static struct ghes_pvt *ghes_pvt; /* * This driver's representation of the system hardware, as collected * from DMI. */ struct ghes_hw_desc { int num_dimms; struct dimm_info *dimms; } ghes_hw; /* GHES registration mutex */ static DEFINE_MUTEX(ghes_reg_mutex); Loading Loading @@ -74,44 +81,35 @@ struct memdev_dmi_entry { u16 conf_mem_clk_speed; } __attribute__((__packed__)); struct ghes_edac_dimm_fill { struct mem_ctl_info *mci; unsigned int count; }; static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) { int *num_dimm = arg; if (dh->type == DMI_ENTRY_MEM_DEVICE) (*num_dimm)++; } static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle) static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle) { struct dimm_info *dimm; mci_for_each_dimm(mci, dimm) { if (dimm->smbios_handle == handle) return dimm->idx; return dimm; } return -1; return NULL; } static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) static void dimm_setup_label(struct dimm_info *dimm, u16 handle) { struct ghes_edac_dimm_fill *dimm_fill = arg; struct mem_ctl_info *mci = dimm_fill->mci; const char *bank = NULL, *device = NULL; if (dh->type == DMI_ENTRY_MEM_DEVICE) { struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0); dmi_memdev_name(handle, &bank, &device); /* both strings must be non-zero */ if (bank && *bank && device && *device) snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) { u16 rdr_mask = BIT(7) | BIT(13); if (entry->size == 0xffff) { pr_info("Can't get DIMM%i size\n", dimm_fill->count); pr_info("Can't get DIMM%i size\n", dimm->idx); dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ } else if (entry->size == 0x7fff) { dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); Loading Loading @@ -179,13 +177,11 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) dimm->dtype = DEV_UNKNOWN; dimm->grain = 128; /* Likely, worse case */ /* * FIXME: It shouldn't be hard to also fill the DIMM labels */ dimm_setup_label(dimm, entry->handle); if (dimm->nr_pages) { edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", dimm_fill->count, edac_mem_types[dimm->mtype], dimm->idx, edac_mem_types[dimm->mtype], PAGES_TO_MiB(dimm->nr_pages), (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", Loading @@ -194,16 +190,56 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) } dimm->smbios_handle = entry->handle; } dimm_fill->count++; static void enumerate_dimms(const struct dmi_header *dh, void *arg) { struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg; struct dimm_info *d; if (dh->type != DMI_ENTRY_MEM_DEVICE) return; /* Enlarge the array with additional 16 */ if (!hw->num_dimms || !(hw->num_dimms % 16)) { struct dimm_info *new; new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info), GFP_KERNEL); if (!new) { WARN_ON_ONCE(1); return; } hw->dimms = new; } d = &hw->dimms[hw->num_dimms]; d->idx = hw->num_dimms; assign_dmi_dimm_info(d, entry); hw->num_dimms++; } static void ghes_scan_system(void) { static bool scanned; if (scanned) return; dmi_walk(enumerate_dimms, &ghes_hw); scanned = true; } void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { struct edac_raw_error_desc *e; struct mem_ctl_info *mci; struct ghes_edac_pvt *pvt; struct ghes_pvt *pvt; unsigned long flags; char *p; Loading @@ -228,7 +264,6 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) memset(e, 0, sizeof (*e)); e->error_count = 1; e->grain = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; e->top_layer = -1; Loading Loading @@ -345,7 +380,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; int index = -1; struct dimm_info *dimm; dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); if (bank != NULL && device != NULL) Loading @@ -354,13 +389,18 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle); if (index >= 0) e->top_layer = index; dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; strcpy(e->label, dimm->label); } } if (p > e->location) *(p - 1) = '\0'; if (!*e->label) strcpy(e->label, "unknown memory"); /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; p += snprintf(p, sizeof(pvt->other_detail), Loading Loading @@ -455,13 +495,12 @@ static struct acpi_platform_list plat_list[] = { int ghes_edac_register(struct ghes *ghes, struct device *dev) { bool fake = false; int rc = 0, num_dimm = 0; struct mem_ctl_info *mci; struct ghes_edac_pvt *pvt; struct ghes_pvt *pvt; struct edac_mc_layer layers[1]; struct ghes_edac_dimm_fill dimm_fill; unsigned long flags; int idx = -1; int rc = 0; if (IS_ENABLED(CONFIG_X86)) { /* Check if safe to enable on this system */ Loading @@ -481,20 +520,19 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) if (refcount_inc_not_zero(&ghes_refcount)) goto unlock; /* Get the number of DIMMs */ dmi_walk(ghes_edac_count_dimms, &num_dimm); ghes_scan_system(); /* Check if we've got a bogus BIOS */ if (num_dimm == 0) { if (!ghes_hw.num_dimms) { fake = true; num_dimm = 1; ghes_hw.num_dimms = 1; } layers[0].type = EDAC_MC_LAYER_ALL_MEM; layers[0].size = num_dimm; layers[0].size = ghes_hw.num_dimms; layers[0].is_virt_csrow = true; mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt)); mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt)); if (!mci) { pr_info("Can't allocate memory for EDAC data\n"); rc = -ENOMEM; Loading @@ -502,7 +540,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) } pvt = mci->pvt_info; pvt->ghes = ghes; pvt->mci = mci; mci->pdev = dev; Loading @@ -523,13 +560,34 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); pr_info("If you find incorrect reports, please contact your hardware vendor\n"); pr_info("to correct its BIOS.\n"); pr_info("This system has %d DIMM sockets.\n", num_dimm); pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms); } if (!fake) { dimm_fill.count = 0; dimm_fill.mci = mci; dmi_walk(ghes_edac_dmidecode, &dimm_fill); struct dimm_info *src, *dst; int i = 0; mci_for_each_dimm(mci, dst) { src = &ghes_hw.dimms[i]; dst->idx = src->idx; dst->smbios_handle = src->smbios_handle; dst->nr_pages = src->nr_pages; dst->mtype = src->mtype; dst->edac_mode = src->edac_mode; dst->dtype = src->dtype; dst->grain = src->grain; /* * If no src->label, preserve default label assigned * from EDAC core. */ if (strlen(src->label)) memcpy(dst->label, src->label, sizeof(src->label)); i++; } } else { struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0); Loading @@ -542,7 +600,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) rc = edac_mc_add_mc(mci); if (rc < 0) { pr_info("Can't register at EDAC core\n"); pr_info("Can't register with the EDAC core\n"); edac_mc_free(mci); rc = -ENODEV; goto unlock; Loading @@ -556,6 +614,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) refcount_set(&ghes_refcount, 1); unlock: /* Not needed anymore */ kfree(ghes_hw.dimms); ghes_hw.dimms = NULL; mutex_unlock(&ghes_reg_mutex); return rc; Loading
include/linux/edac.h +7 −22 Original line number Diff line number Diff line Loading @@ -594,27 +594,6 @@ struct mem_ctl_info { ? (mci)->dimms[(dimm)->idx + 1] \ : NULL) /** * edac_get_dimm_by_index - Get DIMM info at @index from a memory * controller * * @mci: MC descriptor struct mem_ctl_info * @index: index in the memory controller's DIMM array * * Returns a struct dimm_info * or NULL on failure. */ static inline struct dimm_info * edac_get_dimm_by_index(struct mem_ctl_info *mci, int index) { if (index < 0 || index >= mci->tot_dimms) return NULL; if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) return NULL; return mci->dimms[index]; } /** * edac_get_dimm - Get DIMM info from a memory controller given by * [layer0,layer1,layer2] position Loading Loading @@ -650,6 +629,12 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, if (mci->n_layers > 2) index = index * mci->layers[2].size + layer2; return edac_get_dimm_by_index(mci, index); if (index < 0 || index >= mci->tot_dimms) return NULL; if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) return NULL; return mci->dimms[index]; } #endif /* _LINUX_EDAC_H_ */