aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <github@halobates.de>2020-05-27 09:16:43 -0700
committerGitHub <noreply@github.com>2020-05-27 09:16:43 -0700
commit78f5d82590e905f58c7d14fcdb0f5094123c1cdd (patch)
tree8d3b870c82a3481dac8c25467637b65e004b91b9
parent06d65d96432b16cdea2d25ec1010c9e8c4e226a0 (diff)
parent7be8e230537b425f8c4047476747b223bd0d60d6 (diff)
downloadmcelog-78f5d82590e905f58c7d14fcdb0f5094123c1cdd.tar.gz
Merge pull request #79 from JiriDluhosRH/master
Proposal: add information about which path reported a MCE event
-rw-r--r--bus.c4
-rw-r--r--dimm.c2
-rw-r--r--memdb.c21
-rw-r--r--memdb.h3
-rw-r--r--page.c6
-rwxr-xr-xtests/test2
-rw-r--r--trigger.c4
-rw-r--r--trigger.h2
-rw-r--r--unknown.c2
-rw-r--r--yellow.c2
10 files changed, 25 insertions, 23 deletions
diff --git a/bus.c b/bus.c
index df56dc7..20b4741 100644
--- a/bus.c
+++ b/bus.c
@@ -82,7 +82,7 @@ void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr,
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(bus_trigger, NULL, env, false);
+ run_trigger(bus_trigger, NULL, env, false, "bus");
for (i = 0; i < ei; i++)
free(env[i]);
free(msg);
@@ -119,7 +119,7 @@ void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn)
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(iomca_trigger, NULL, env, false);
+ run_trigger(iomca_trigger, NULL, env, false, "iomca");
for (i = 0; i < ei; i++)
free(env[i]);
free(msg);
diff --git a/dimm.c b/dimm.c
index 26d0118..7edb6b3 100644
--- a/dimm.c
+++ b/dimm.c
@@ -374,7 +374,7 @@ void new_error(unsigned long long addr, unsigned long max_error, char *trigger)
Lprintf("Large number of corrected errors in memory at %s", loc);
Lprintf("Consider replacing it");
if (trigger && trigger[0])
- run_trigger(trigger, loc, val, max_error, false);
+ run_trigger(trigger, loc, val, max_error, false, "dimm");
}
}
free(devs);
diff --git a/memdb.c b/memdb.c
index c2133a6..8dfe15d 100644
--- a/memdb.c
+++ b/memdb.c
@@ -132,7 +132,8 @@ static char *format_location(struct memdimm *md)
/* Run a user defined trigger when a error threshold is crossed. */
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
- struct err_type *et, struct bucket_conf *bc, char *args[], bool sync)
+ struct err_type *et, struct bucket_conf *bc, char *args[], bool sync,
+ const char* reporter)
{
struct leaky_bucket *bucket = &et->bucket;
char *env[MAX_ENV];
@@ -172,7 +173,7 @@ void memdb_trigger(char *msg, struct memdimm *md, time_t t,
xasprintf(&env[ei++], "THRESHOLD_COUNT=%d", bucket->count);
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(bc->trigger, args, env, sync);
+ run_trigger(bc->trigger, args, env, sync, reporter);
for (i = 0; i < ei; i++)
free(env[i]);
out:
@@ -186,7 +187,7 @@ out:
* we have no clues where they are.
*/
static void
-account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned corr_err_cnt)
+account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned corr_err_cnt, const char* reporter)
{
if (corr_err_cnt && --corr_err_cnt > 0) {
md->ce.count += corr_err_cnt;
@@ -194,14 +195,14 @@ account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned
char *msg;
xasprintf(&msg, "Fallback %s memory error count %d exceeded threshold",
t->type, corr_err_cnt);
- memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false);
+ memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false, reporter);
free(msg);
}
}
}
static void
-account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m)
+account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m, const char* reporter)
{
char *msg;
@@ -211,11 +212,11 @@ account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m)
if (m->status & MCI_STATUS_UC) {
md->uc.count++;
if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time))
- memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false);
+ memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false, reporter);
} else {
md->ce.count++;
if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time))
- memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false);
+ memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false, reporter);
}
free(msg);
}
@@ -241,13 +242,13 @@ void memory_error(struct mce *m, int ch, int dimm, unsigned corr_err_cnt,
if (memdb_enabled && (ch != -1 || dimm != -1)) {
md = get_memdimm(m->socketid, ch, dimm, 1);
- account_memdb(&dimms, md, m);
+ account_memdb(&dimms, md, m, "memdb");
}
if (sockdb_enabled) {
md = get_memdimm(m->socketid, -1, -1, 1);
- account_over(&sockets, md, m, corr_err_cnt);
- account_memdb(&sockets, md, m);
+ account_over(&sockets, md, m, corr_err_cnt, "sockdb_fallback");
+ account_memdb(&sockets, md, m, "sockdb_memdb");
}
}
diff --git a/memdb.h b/memdb.h
index b495ad7..74151f5 100644
--- a/memdb.h
+++ b/memdb.h
@@ -20,5 +20,6 @@ void memory_error(struct mce *m, int channel, int dimm, unsigned corr_err_cnt,
struct memdimm;
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
- struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync);
+ struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync,
+ const char* reporter);
struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert);
diff --git a/page.c b/page.c
index da10364..59b2204 100644
--- a/page.c
+++ b/page.c
@@ -220,7 +220,7 @@ void account_page_error(struct mce *m, int channel, int dimm)
xasprintf(&msg, "Corrected memory errors on page %llx exceed threshold %s",
addr, thresh);
free(thresh);
- memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false);
+ memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false, "page");
free(msg);
mp->triggered = 1;
@@ -241,7 +241,7 @@ void account_page_error(struct mce *m, int channel, int dimm)
argv[0]=page_error_pre_soft_trigger;
argv[1]=args;
asprintf(&msg, "pre soft trigger run for page %lld", addr);
- memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
+ memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true, "page_pre_soft");
free(msg);
offline_action(mp, addr);
@@ -251,7 +251,7 @@ void account_page_error(struct mce *m, int channel, int dimm)
argv[0]=page_error_post_soft_trigger;
argv[1]=args;
asprintf(&msg, "post soft trigger run for page %lld", addr);
- memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
+ memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true, "page_post_soft");
free(msg);
free(args);
diff --git a/tests/test b/tests/test
index 148bf1f..9623f4a 100755
--- a/tests/test
+++ b/tests/test
@@ -61,7 +61,7 @@ do
if [ "$NUMT" != "" ] ; then
if [ "$NUMC" != "$NUMT" ] ; then
- echo "$conf: triggers did not trigger as expected: $NUMT != $NUMC" >> results
+ echo "$conf: triggers did not trigger as expected: expected $NUMT, got $NUMC" >> results
else
echo "$conf: triggers trigger as expected" >> results
fi
diff --git a/trigger.c b/trigger.c
index a46b23f..6480c58 100644
--- a/trigger.c
+++ b/trigger.c
@@ -61,7 +61,7 @@ pid_t mcelog_fork(const char *name)
}
// note: trigger must be allocated, e.g. from config
-void run_trigger(char *trigger, char *argv[], char **env, bool sync)
+void run_trigger(char *trigger, char *argv[], char **env, bool sync, const char* reporter)
{
pid_t child;
@@ -73,7 +73,7 @@ void run_trigger(char *trigger, char *argv[], char **env, bool sync)
if (!argv)
argv = fallback_argv;
- Lprintf("Running trigger `%s'\n", trigger);
+ Lprintf("Running trigger `%s' (reporter: %s)\n", trigger, reporter);
if (children_max > 0 && num_children >= children_max) {
Eprintf("Too many trigger children running already\n");
return;
diff --git a/trigger.h b/trigger.h
index f377506..b9c806f 100644
--- a/trigger.h
+++ b/trigger.h
@@ -2,7 +2,7 @@
#define __TRIGGER_H__
#include <stdbool.h>
-void run_trigger(char *trigger, char *argv[], char **env, bool sync);
+void run_trigger(char *trigger, char *argv[], char **env, bool sync, const char* reporter);
void trigger_setup(void);
void trigger_wait(void);
int trigger_check(char *);
diff --git a/unknown.c b/unknown.c
index d2c0627..12feac8 100644
--- a/unknown.c
+++ b/unknown.c
@@ -73,7 +73,7 @@ void run_unknown_trigger(int socket, int cpu, struct mce *log)
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(unknown_trigger, NULL, env, false);
+ run_trigger(unknown_trigger, NULL, env, false, "unknown");
for (i = 0; i < ei; i++)
free(env[i]);
free(msg);
diff --git a/yellow.c b/yellow.c
index a077943..3616a8b 100644
--- a/yellow.c
+++ b/yellow.c
@@ -95,7 +95,7 @@ void run_yellow_trigger(int cpu, int tnum, int lnum, char *ts, char *ls, int soc
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(yellow_trigger, NULL, env, false);
+ run_trigger(yellow_trigger, NULL, env, false, "yellow");
for (i = 0; i < ei; i++)
free(env[i]);
out: