diff options
author | Andi Kleen <github@halobates.de> | 2020-05-27 09:16:43 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-27 09:16:43 -0700 |
commit | 78f5d82590e905f58c7d14fcdb0f5094123c1cdd (patch) | |
tree | 8d3b870c82a3481dac8c25467637b65e004b91b9 | |
parent | 06d65d96432b16cdea2d25ec1010c9e8c4e226a0 (diff) | |
parent | 7be8e230537b425f8c4047476747b223bd0d60d6 (diff) | |
download | mcelog-78f5d82590e905f58c7d14fcdb0f5094123c1cdd.tar.gz |
Merge pull request #79 from JiriDluhosRH/master
Proposal: add information about which path reported a MCE event
-rw-r--r-- | bus.c | 4 | ||||
-rw-r--r-- | dimm.c | 2 | ||||
-rw-r--r-- | memdb.c | 21 | ||||
-rw-r--r-- | memdb.h | 3 | ||||
-rw-r--r-- | page.c | 6 | ||||
-rwxr-xr-x | tests/test | 2 | ||||
-rw-r--r-- | trigger.c | 4 | ||||
-rw-r--r-- | trigger.h | 2 | ||||
-rw-r--r-- | unknown.c | 2 | ||||
-rw-r--r-- | yellow.c | 2 |
10 files changed, 25 insertions, 23 deletions
@@ -82,7 +82,7 @@ void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr, env[ei] = NULL; assert(ei < MAX_ENV); - run_trigger(bus_trigger, NULL, env, false); + run_trigger(bus_trigger, NULL, env, false, "bus"); for (i = 0; i < ei; i++) free(env[i]); free(msg); @@ -119,7 +119,7 @@ void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn) env[ei] = NULL; assert(ei < MAX_ENV); - run_trigger(iomca_trigger, NULL, env, false); + run_trigger(iomca_trigger, NULL, env, false, "iomca"); for (i = 0; i < ei; i++) free(env[i]); free(msg); @@ -374,7 +374,7 @@ void new_error(unsigned long long addr, unsigned long max_error, char *trigger) Lprintf("Large number of corrected errors in memory at %s", loc); Lprintf("Consider replacing it"); if (trigger && trigger[0]) - run_trigger(trigger, loc, val, max_error, false); + run_trigger(trigger, loc, val, max_error, false, "dimm"); } } free(devs); @@ -132,7 +132,8 @@ static char *format_location(struct memdimm *md) /* Run a user defined trigger when a error threshold is crossed. */ void memdb_trigger(char *msg, struct memdimm *md, time_t t, - struct err_type *et, struct bucket_conf *bc, char *args[], bool sync) + struct err_type *et, struct bucket_conf *bc, char *args[], bool sync, + const char* reporter) { struct leaky_bucket *bucket = &et->bucket; char *env[MAX_ENV]; @@ -172,7 +173,7 @@ void memdb_trigger(char *msg, struct memdimm *md, time_t t, xasprintf(&env[ei++], "THRESHOLD_COUNT=%d", bucket->count); env[ei] = NULL; assert(ei < MAX_ENV); - run_trigger(bc->trigger, args, env, sync); + run_trigger(bc->trigger, args, env, sync, reporter); for (i = 0; i < ei; i++) free(env[i]); out: @@ -186,7 +187,7 @@ out: * we have no clues where they are. */ static void -account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned corr_err_cnt) +account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned corr_err_cnt, const char* reporter) { if (corr_err_cnt && --corr_err_cnt > 0) { md->ce.count += corr_err_cnt; @@ -194,14 +195,14 @@ account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned char *msg; xasprintf(&msg, "Fallback %s memory error count %d exceeded threshold", t->type, corr_err_cnt); - memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false); + memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false, reporter); free(msg); } } } static void -account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m) +account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m, const char* reporter) { char *msg; @@ -211,11 +212,11 @@ account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m) if (m->status & MCI_STATUS_UC) { md->uc.count++; if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time)) - memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false); + memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false, reporter); } else { md->ce.count++; if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time)) - memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false); + memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false, reporter); } free(msg); } @@ -241,13 +242,13 @@ void memory_error(struct mce *m, int ch, int dimm, unsigned corr_err_cnt, if (memdb_enabled && (ch != -1 || dimm != -1)) { md = get_memdimm(m->socketid, ch, dimm, 1); - account_memdb(&dimms, md, m); + account_memdb(&dimms, md, m, "memdb"); } if (sockdb_enabled) { md = get_memdimm(m->socketid, -1, -1, 1); - account_over(&sockets, md, m, corr_err_cnt); - account_memdb(&sockets, md, m); + account_over(&sockets, md, m, corr_err_cnt, "sockdb_fallback"); + account_memdb(&sockets, md, m, "sockdb_memdb"); } } @@ -20,5 +20,6 @@ void memory_error(struct mce *m, int channel, int dimm, unsigned corr_err_cnt, struct memdimm; void memdb_trigger(char *msg, struct memdimm *md, time_t t, - struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync); + struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync, + const char* reporter); struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert); @@ -220,7 +220,7 @@ void account_page_error(struct mce *m, int channel, int dimm) xasprintf(&msg, "Corrected memory errors on page %llx exceed threshold %s", addr, thresh); free(thresh); - memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false); + memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false, "page"); free(msg); mp->triggered = 1; @@ -241,7 +241,7 @@ void account_page_error(struct mce *m, int channel, int dimm) argv[0]=page_error_pre_soft_trigger; argv[1]=args; asprintf(&msg, "pre soft trigger run for page %lld", addr); - memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true); + memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true, "page_pre_soft"); free(msg); offline_action(mp, addr); @@ -251,7 +251,7 @@ void account_page_error(struct mce *m, int channel, int dimm) argv[0]=page_error_post_soft_trigger; argv[1]=args; asprintf(&msg, "post soft trigger run for page %lld", addr); - memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true); + memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true, "page_post_soft"); free(msg); free(args); @@ -61,7 +61,7 @@ do if [ "$NUMT" != "" ] ; then if [ "$NUMC" != "$NUMT" ] ; then - echo "$conf: triggers did not trigger as expected: $NUMT != $NUMC" >> results + echo "$conf: triggers did not trigger as expected: expected $NUMT, got $NUMC" >> results else echo "$conf: triggers trigger as expected" >> results fi @@ -61,7 +61,7 @@ pid_t mcelog_fork(const char *name) } // note: trigger must be allocated, e.g. from config -void run_trigger(char *trigger, char *argv[], char **env, bool sync) +void run_trigger(char *trigger, char *argv[], char **env, bool sync, const char* reporter) { pid_t child; @@ -73,7 +73,7 @@ void run_trigger(char *trigger, char *argv[], char **env, bool sync) if (!argv) argv = fallback_argv; - Lprintf("Running trigger `%s'\n", trigger); + Lprintf("Running trigger `%s' (reporter: %s)\n", trigger, reporter); if (children_max > 0 && num_children >= children_max) { Eprintf("Too many trigger children running already\n"); return; @@ -2,7 +2,7 @@ #define __TRIGGER_H__ #include <stdbool.h> -void run_trigger(char *trigger, char *argv[], char **env, bool sync); +void run_trigger(char *trigger, char *argv[], char **env, bool sync, const char* reporter); void trigger_setup(void); void trigger_wait(void); int trigger_check(char *); @@ -73,7 +73,7 @@ void run_unknown_trigger(int socket, int cpu, struct mce *log) env[ei] = NULL; assert(ei < MAX_ENV); - run_trigger(unknown_trigger, NULL, env, false); + run_trigger(unknown_trigger, NULL, env, false, "unknown"); for (i = 0; i < ei; i++) free(env[i]); free(msg); @@ -95,7 +95,7 @@ void run_yellow_trigger(int cpu, int tnum, int lnum, char *ts, char *ls, int soc env[ei] = NULL; assert(ei < MAX_ENV); - run_trigger(yellow_trigger, NULL, env, false); + run_trigger(yellow_trigger, NULL, env, false, "yellow"); for (i = 0; i < ei; i++) free(env[i]); out: |