aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWeihong Zhang <weihong.zhang@intel.com>2022-08-22 17:27:32 +0800
committerTony Luck <tony.luck@intel.com>2022-08-25 11:38:26 -0700
commit110cfb50bc86498a522619c68ba1cb10e141e5eb (patch)
treec96003af5057deb310922fd5ee1915beab9428d5
parent11a5631c275b5642206d3d7dbf07555c04dc8799 (diff)
downloadmce-test-110cfb50bc86498a522619c68ba1cb10e141e5eb.tar.gz
edac.sh: Save more information to log and add retry in test specify address
Update source codes related to writing log in EDAC test, Add retry during test specify address. Signed-off-by: Jin Wen <wen.jin@intel.com> Signed-off-by: Weihong Zhang <weihong.zhang@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rwxr-xr-xcases/function/edac/edac.sh61
1 files changed, 42 insertions, 19 deletions
diff --git a/cases/function/edac/edac.sh b/cases/function/edac/edac.sh
index eb80e43..c1f6d09 100755
--- a/cases/function/edac/edac.sh
+++ b/cases/function/edac/edac.sh
@@ -208,11 +208,30 @@ inject_lot_ce()
save_memconf
}
+write_log()
+{
+ echo "-----------------------" >> $LOG_FILE
+ printf "0x%016lx %s\n" $1 $2 | tee -a $LOG_FILE
+ echo "-----------------------" >> $LOG_FILE
+ echo -e "\nEDAC message expected in reference file:\n" >> $LOG_FILE
+ echo -e "$3\n" >> $LOG_FILE
+ echo -e "\nEDAC messages actually obtained from dmesg:\n" >> $LOG_FILE
+ dmesg -c >> $LOG_FILE
+ echo >> $LOG_FILE
+}
+
+# On some platforms(e.g., clx-4s), there is actually no 'All' option value can
+# be available for 'Correct Error Threshold' BIOS setup option, which can be set
+# as the minimum value(<=5). EDAC messages for some addresses may be lost during
+# test. To work around it, when one address test fail, we retry 5 times, if the
+# EDAC message for this address is still lost, we think this address test fail.
test_spec_addr()
{
+ local retry_cnt=5
local addr
dmesg -c &> /dev/null
+ # inject memory CE to spec address from reference file
echo $ERR_TYPE > $EINJ_IF/error_type
echo 0xfffffffffffff000 > $EINJ_IF/param2
echo 0x0 > $EINJ_IF/notrigger
@@ -223,13 +242,24 @@ test_spec_addr()
echo "$line" | grep -q EDAC
[ $? -ne 0 ] && continue
addr=$(echo "$line" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000"
- #printf "addr=0x%x\n" $addr
- echo $addr > $EINJ_IF/param1
- echo 1 > $EINJ_IF/error_inject
- # add engough delay to get full kernel message
- sleep 0.5
- #check the new kernel message with ref message
- check_result "$line"
+ for retry in `seq 1 ${retry_cnt}`
+ do
+ echo $addr > $EINJ_IF/param1
+ echo 1 > $EINJ_IF/error_inject
+ # Add enough delay to get full kernel message
+ # and avoid CE error count accumulation on same address.
+ # otherwise need to handle multi-error count cases in check_result
+ sleep 1
+ # check the new kernel message with error message in reference file
+ check_result "$line"
+ if [ $? -eq 0 ]; then
+ write_log $addr "PASS" "$line"
+ break
+ elif [ "$retry" -eq "${retry_cnt}" ]; then
+ let "COUNT_FAIL += 1"
+ write_log $addr "FAIL" "$line"
+ fi
+ done
done < $EDAC_REF_FILE
}
@@ -238,7 +268,7 @@ check_result()
local addr
local tmpstr
local edac_str
-
+ local ret=0
addr=$(echo "$@" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000"
tmpstr="$@"
# remove timestamp in head of each line
@@ -246,20 +276,13 @@ check_result()
dmesg | grep -q "$edac_str"
if [ $? -ne 0 ]; then
# re-check it to avoid later coming message
- sleep 1
dmesg | grep -q "$edac_str"
- if [ $? -eq 0 ]; then
- printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE
- else
- printf "0x%016lx FAIL\n" $addr | tee -a $LOG_FILE
- let "COUNT_FAIL += 1"
+ if [ $? -ne 0 ]; then
+ ret=1
fi
- else
- printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE
fi
- echo -e "\nEDAC dmesg output as below:\n" >> $LOG_FILE
- dmesg -c >> $LOG_FILE
- echo >> $LOG_FILE
+
+ return $ret
}
check_mem_conf()