Loading drivers/edac/edac_mc.c +48 −33 Original line number Diff line number Diff line Loading @@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } #define OTHER_LABEL " or " /** * edac_mc_handle_error - reports a memory event to userspace Loading Loading @@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { /* FIXME: too much for stack: move it to some pre-alocated area */ char detail[80], location[80]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char detail[80]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i; long grain; bool enable_per_layer_report = false; int i, n_labels = 0; u8 grain_bits; struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); /* Fills the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = error_count; e->top_layer = top_layer; e->mid_layer = mid_layer; e->low_layer = low_layer; e->page_frame_number = page_frame_number; e->offset_in_page = offset_in_page; e->syndrome = syndrome; e->msg = msg; e->other_detail = other_detail; /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be Loading @@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) enable_per_layer_report = true; e->enable_per_layer_report = true; } /* Loading @@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ grain = 0; p = label; p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { Loading @@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ if (dimm->grain > grain) grain = dimm->grain; if (dimm->grain > e->grain) e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to Loading @@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ if (enable_per_layer_report && dimm->nr_pages) { if (p != label) { if (e->enable_per_layer_report && dimm->nr_pages) { if (n_labels >= EDAC_MAX_LABELS) { e->enable_per_layer_report = false; break; } n_labels++; if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } Loading @@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } if (!enable_per_layer_report) { strcpy(label, "any memory"); if (!e->enable_per_layer_report) { strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); if (p == label) strcpy(label, "unknown memory"); if (p == e->label) strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; Loading @@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ p = location; p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) Loading @@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } if (p > location) if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ grain_bits = fls_long(grain) + 1; trace_mc_event(type, msg, label, error_count, mci->mc_idx, top_layer, mid_layer, low_layer, PAGES_TO_MiB(page_frame_number) | offset_in_page, grain_bits, syndrome, other_detail); grain_bits = fls_long(e->grain) + 1; trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, grain_bits, e->syndrome, other_detail); /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", page_frame_number, offset_in_page, grain, syndrome); edac_ce_error(mci, error_count, pos, msg, location, label, detail, other_detail, enable_per_layer_report, page_frame_number, offset_in_page, grain); e->page_frame_number, e->offset_in_page, e->grain, e->syndrome); edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, detail, other_detail, e->enable_per_layer_report, e->page_frame_number, e->offset_in_page, e->grain); } else { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld", page_frame_number, offset_in_page, grain); page_frame_number, offset_in_page, e->grain); edac_ue_error(mci, error_count, pos, msg, location, label, detail, other_detail, enable_per_layer_report); edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, detail, other_detail, e->enable_per_layer_report); } } EXPORT_SYMBOL_GPL(edac_mc_handle_error); include/linux/edac.h +56 −0 Original line number Diff line number Diff line Loading @@ -47,8 +47,18 @@ static inline void opstate_init(void) return; } /* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 /* Maximum size of the location string */ #define LOCATION_SIZE 80 /* Defines the maximum number of labels that can be reported */ #define EDAC_MAX_LABELS 8 /* String used to join two or more labels */ #define OTHER_LABEL " or " /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it Loading Loading @@ -553,6 +563,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; /** * edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type * @top_layer: top layer of the error (layer[0]) * @mid_layer: middle layer of the error (layer[1]) * @low_layer: low layer of the error (layer[2]) * @page_frame_number: page where the error happened * @offset_in_page: page offset * @syndrome: syndrome of the error (or 0 if unknown or if * the syndrome is not applicable) * @msg: error message * @location: location of the error * @label: label of the affected DIMM(s) * @other_detail: other driver-specific detail about the error * @enable_per_layer_report: if false, the error affects all layers * (typically, a memory controller error) */ struct edac_raw_error_desc { /* * NOTE: everything before grain won't be cleaned by * edac_raw_error_desc_clean() */ char location[LOCATION_SIZE]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; long grain; /* the vars below and grain will be cleaned on every new error report */ u16 error_count; int top_layer; int mid_layer; int low_layer; unsigned long page_frame_number; unsigned long offset_in_page; unsigned long syndrome; const char *msg; const char *other_detail; bool enable_per_layer_report; }; /* MEMORY controller information structure */ struct mem_ctl_info { Loading Loading @@ -660,6 +710,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; /* * Used to report an error - by being at the global struct * makes the memory allocated by the EDAC core */ struct edac_raw_error_desc error_desc; /* the internal state of this controller instance */ int op_state; Loading Loading
drivers/edac/edac_mc.c +48 −33 Original line number Diff line number Diff line Loading @@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } #define OTHER_LABEL " or " /** * edac_mc_handle_error - reports a memory event to userspace Loading Loading @@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { /* FIXME: too much for stack: move it to some pre-alocated area */ char detail[80], location[80]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char detail[80]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i; long grain; bool enable_per_layer_report = false; int i, n_labels = 0; u8 grain_bits; struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); /* Fills the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = error_count; e->top_layer = top_layer; e->mid_layer = mid_layer; e->low_layer = low_layer; e->page_frame_number = page_frame_number; e->offset_in_page = offset_in_page; e->syndrome = syndrome; e->msg = msg; e->other_detail = other_detail; /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be Loading @@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) enable_per_layer_report = true; e->enable_per_layer_report = true; } /* Loading @@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ grain = 0; p = label; p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { Loading @@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ if (dimm->grain > grain) grain = dimm->grain; if (dimm->grain > e->grain) e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to Loading @@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ if (enable_per_layer_report && dimm->nr_pages) { if (p != label) { if (e->enable_per_layer_report && dimm->nr_pages) { if (n_labels >= EDAC_MAX_LABELS) { e->enable_per_layer_report = false; break; } n_labels++; if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } Loading @@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } if (!enable_per_layer_report) { strcpy(label, "any memory"); if (!e->enable_per_layer_report) { strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); if (p == label) strcpy(label, "unknown memory"); if (p == e->label) strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; Loading @@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ p = location; p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) Loading @@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } if (p > location) if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ grain_bits = fls_long(grain) + 1; trace_mc_event(type, msg, label, error_count, mci->mc_idx, top_layer, mid_layer, low_layer, PAGES_TO_MiB(page_frame_number) | offset_in_page, grain_bits, syndrome, other_detail); grain_bits = fls_long(e->grain) + 1; trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, grain_bits, e->syndrome, other_detail); /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", page_frame_number, offset_in_page, grain, syndrome); edac_ce_error(mci, error_count, pos, msg, location, label, detail, other_detail, enable_per_layer_report, page_frame_number, offset_in_page, grain); e->page_frame_number, e->offset_in_page, e->grain, e->syndrome); edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, detail, other_detail, e->enable_per_layer_report, e->page_frame_number, e->offset_in_page, e->grain); } else { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld", page_frame_number, offset_in_page, grain); page_frame_number, offset_in_page, e->grain); edac_ue_error(mci, error_count, pos, msg, location, label, detail, other_detail, enable_per_layer_report); edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, detail, other_detail, e->enable_per_layer_report); } } EXPORT_SYMBOL_GPL(edac_mc_handle_error);
include/linux/edac.h +56 −0 Original line number Diff line number Diff line Loading @@ -47,8 +47,18 @@ static inline void opstate_init(void) return; } /* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 /* Maximum size of the location string */ #define LOCATION_SIZE 80 /* Defines the maximum number of labels that can be reported */ #define EDAC_MAX_LABELS 8 /* String used to join two or more labels */ #define OTHER_LABEL " or " /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it Loading Loading @@ -553,6 +563,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; /** * edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type * @top_layer: top layer of the error (layer[0]) * @mid_layer: middle layer of the error (layer[1]) * @low_layer: low layer of the error (layer[2]) * @page_frame_number: page where the error happened * @offset_in_page: page offset * @syndrome: syndrome of the error (or 0 if unknown or if * the syndrome is not applicable) * @msg: error message * @location: location of the error * @label: label of the affected DIMM(s) * @other_detail: other driver-specific detail about the error * @enable_per_layer_report: if false, the error affects all layers * (typically, a memory controller error) */ struct edac_raw_error_desc { /* * NOTE: everything before grain won't be cleaned by * edac_raw_error_desc_clean() */ char location[LOCATION_SIZE]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; long grain; /* the vars below and grain will be cleaned on every new error report */ u16 error_count; int top_layer; int mid_layer; int low_layer; unsigned long page_frame_number; unsigned long offset_in_page; unsigned long syndrome; const char *msg; const char *other_detail; bool enable_per_layer_report; }; /* MEMORY controller information structure */ struct mem_ctl_info { Loading Loading @@ -660,6 +710,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; /* * Used to report an error - by being at the global struct * makes the memory allocated by the EDAC core */ struct edac_raw_error_desc error_desc; /* the internal state of this controller instance */ int op_state; Loading