aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZou Cao <zoucao.zc@alibaba-inc.com>2018-05-13 08:54:09 +0800
committerZou Cao <zoucao.zc@alibaba-inc.com>2018-05-16 19:40:36 +0800
commit18531c24d67cfa025fcc60723f5e4f2a07bb84ee (patch)
treedb6be57efdf876b192ef9a4a2dcf4722edb77d83
parent0dcbe4abf39c6628d36f777a767e3dc7185a53dd (diff)
downloadmcelog-18531c24d67cfa025fcc60723f5e4f2a07bb84ee.tar.gz
transfer the page address to pre/post-sync-trigger scripts
Signed-off-by: Zou Cao <zoucao@linux.alibaba.com>
-rw-r--r--memdb.c10
-rw-r--r--memdb.h2
-rw-r--r--page.c24
-rw-r--r--triggers/page-error-post-sync-soft-trigger5
-rwxr-xr-xtriggers/page-error-pre-sync-soft-trigger5
5 files changed, 31 insertions, 15 deletions
diff --git a/memdb.c b/memdb.c
index 83ed50c..c2133a6 100644
--- a/memdb.c
+++ b/memdb.c
@@ -132,7 +132,7 @@ static char *format_location(struct memdimm *md)
/* Run a user defined trigger when a error threshold is crossed. */
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
- struct err_type *et, struct bucket_conf *bc, bool sync)
+ struct err_type *et, struct bucket_conf *bc, char *args[], bool sync)
{
struct leaky_bucket *bucket = &et->bucket;
char *env[MAX_ENV];
@@ -172,7 +172,7 @@ void memdb_trigger(char *msg, struct memdimm *md, time_t t,
xasprintf(&env[ei++], "THRESHOLD_COUNT=%d", bucket->count);
env[ei] = NULL;
assert(ei < MAX_ENV);
- run_trigger(bc->trigger, NULL, env, sync);
+ run_trigger(bc->trigger, args, env, sync);
for (i = 0; i < ei; i++)
free(env[i]);
out:
@@ -194,7 +194,7 @@ account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned
char *msg;
xasprintf(&msg, "Fallback %s memory error count %d exceeded threshold",
t->type, corr_err_cnt);
- memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, false);
+ memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false);
free(msg);
}
}
@@ -211,11 +211,11 @@ account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m)
if (m->status & MCI_STATUS_UC) {
md->uc.count++;
if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time))
- memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, false);
+ memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false);
} else {
md->ce.count++;
if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time))
- memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, false);
+ memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false);
}
free(msg);
}
diff --git a/memdb.h b/memdb.h
index 09ddd44..b495ad7 100644
--- a/memdb.h
+++ b/memdb.h
@@ -20,5 +20,5 @@ void memory_error(struct mce *m, int channel, int dimm, unsigned corr_err_cnt,
struct memdimm;
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
- struct err_type *et, struct bucket_conf *bc, bool sync);
+ struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync);
struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert);
diff --git a/page.c b/page.c
index f72db08..da10364 100644
--- a/page.c
+++ b/page.c
@@ -220,26 +220,40 @@ void account_page_error(struct mce *m, int channel, int dimm)
xasprintf(&msg, "Corrected memory errors on page %llx exceed threshold %s",
addr, thresh);
free(thresh);
- memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, false);
+ memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false);
free(msg);
mp->triggered = 1;
if (offline == OFFLINE_SOFT || offline == OFFLINE_SOFT_THEN_HARD) {
struct bucket_conf page_soft_trigger_conf;
+ char *argv[] = {
+ NULL,
+ NULL,
+ NULL,
+ };
+ char *args;
+
+ asprintf(&args, "%lld", addr);
+ argv[0]=args;
memcpy(&page_soft_trigger_conf, &page_trigger_conf, sizeof(struct bucket_conf));
page_soft_trigger_conf.trigger = page_error_pre_soft_trigger;
- asprintf(&msg, "pre soft trigger run for page %llx", addr);
- memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, true);
+ argv[0]=page_error_pre_soft_trigger;
+ argv[1]=args;
+ asprintf(&msg, "pre soft trigger run for page %lld", addr);
+ memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
free(msg);
offline_action(mp, addr);
memcpy(&page_soft_trigger_conf, &page_trigger_conf, sizeof(struct bucket_conf));
page_soft_trigger_conf.trigger = page_error_post_soft_trigger;
- asprintf(&msg, "post soft trigger run for page %llx", addr);
- memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, true);
+ argv[0]=page_error_post_soft_trigger;
+ argv[1]=args;
+ asprintf(&msg, "post soft trigger run for page %lld", addr);
+ memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
free(msg);
+ free(args);
} else
offline_action(mp, addr);
diff --git a/triggers/page-error-post-sync-soft-trigger b/triggers/page-error-post-sync-soft-trigger
index e8e6ec6..f755216 100644
--- a/triggers/page-error-post-sync-soft-trigger
+++ b/triggers/page-error-post-sync-soft-trigger
@@ -18,6 +18,7 @@
# UCCOUNT Total uncorrected error count for DIMM
# LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds)
# THRESHOLD_COUNT Total umber of events in current threshold time period of specific type
+# ARGUMENTS:$1 the page address of soft offline
#
# note: will run as mcelog configured user
# this can be changed in mcelog.conf
@@ -25,14 +26,14 @@
logger -s -p daemon.err -t mcelog "$MESSAGE"
logger -s -p daemon.err -t mcelog "Location: $LOCATION"
-[ -x ./page-error-post-sync-soft-trigger.local ] && . ./page-error-post-sync-soft-trigger.local
+[ -x ./page-error-post-sync-soft-trigger.local ] && . ./page-error-post-sync-soft-trigger.local $1
if [ -d page-error-post-sync-soft-trigger.extern ]
then
ls page-error-post-sync-soft-trigger.extern |
while read item
do
- [ -x ./page-error-post-sync-soft-trigger.extern/$item ] && . ./page-error-post-sync-soft-trigger.extern/$item
+ [ -x ./page-error-post-sync-soft-trigger.extern/$item ] && . ./page-error-post-sync-soft-trigger.extern/$item $1
done
fi
diff --git a/triggers/page-error-pre-sync-soft-trigger b/triggers/page-error-pre-sync-soft-trigger
index 27269c6..fe43c91 100755
--- a/triggers/page-error-pre-sync-soft-trigger
+++ b/triggers/page-error-pre-sync-soft-trigger
@@ -18,6 +18,7 @@
# UCCOUNT Total uncorrected error count for DIMM
# LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds)
# THRESHOLD_COUNT Total umber of events in current threshold time period of specific type
+# ARGUMENTS:$1 the page address of soft offline
#
# note: will run as mcelog configured user
# this can be changed in mcelog.conf
@@ -25,14 +26,14 @@
logger -s -p daemon.err -t mcelog "$MESSAGE"
logger -s -p daemon.err -t mcelog "Location: $LOCATION"
-[ -x ./page-error-pre-soft-trigger.local ] && . ./page-error-pre-soft-trigger.local
+[ -x ./page-error-pre-soft-trigger.local ] && . ./page-error-pre-soft-trigger.local $1
if [ -d page-error-pre-sync-soft-trigger.extern ]
then
ls page-error-pre-sync-soft-trigger.extern |
while read item
do
- [ -x ./page-error-pre-sync-soft-trigger.extern/$item ] && . ./page-error-pre-sync-soft-trigger.extern/$item
+ [ -x ./page-error-pre-sync-soft-trigger.extern/$item ] && . ./page-error-pre-sync-soft-trigger.extern/$item $1
done
fi