diff options
author | Weihong Zhang <weihong.zhang@intel.com> | 2022-08-22 17:27:32 +0800 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2022-08-25 11:38:26 -0700 |
commit | 110cfb50bc86498a522619c68ba1cb10e141e5eb (patch) | |
tree | c96003af5057deb310922fd5ee1915beab9428d5 | |
parent | 11a5631c275b5642206d3d7dbf07555c04dc8799 (diff) | |
download | mce-test-110cfb50bc86498a522619c68ba1cb10e141e5eb.tar.gz |
edac.sh: Save more information to log and add retry in test specify address
Update source codes related to writing log in EDAC test,
Add retry during test specify address.
Signed-off-by: Jin Wen <wen.jin@intel.com>
Signed-off-by: Weihong Zhang <weihong.zhang@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rwxr-xr-x | cases/function/edac/edac.sh | 61 |
1 files changed, 42 insertions, 19 deletions
diff --git a/cases/function/edac/edac.sh b/cases/function/edac/edac.sh index eb80e43..c1f6d09 100755 --- a/cases/function/edac/edac.sh +++ b/cases/function/edac/edac.sh @@ -208,11 +208,30 @@ inject_lot_ce() save_memconf } +write_log() +{ + echo "-----------------------" >> $LOG_FILE + printf "0x%016lx %s\n" $1 $2 | tee -a $LOG_FILE + echo "-----------------------" >> $LOG_FILE + echo -e "\nEDAC message expected in reference file:\n" >> $LOG_FILE + echo -e "$3\n" >> $LOG_FILE + echo -e "\nEDAC messages actually obtained from dmesg:\n" >> $LOG_FILE + dmesg -c >> $LOG_FILE + echo >> $LOG_FILE +} + +# On some platforms(e.g., clx-4s), there is actually no 'All' option value can +# be available for 'Correct Error Threshold' BIOS setup option, which can be set +# as the minimum value(<=5). EDAC messages for some addresses may be lost during +# test. To work around it, when one address test fail, we retry 5 times, if the +# EDAC message for this address is still lost, we think this address test fail. test_spec_addr() { + local retry_cnt=5 local addr dmesg -c &> /dev/null + # inject memory CE to spec address from reference file echo $ERR_TYPE > $EINJ_IF/error_type echo 0xfffffffffffff000 > $EINJ_IF/param2 echo 0x0 > $EINJ_IF/notrigger @@ -223,13 +242,24 @@ test_spec_addr() echo "$line" | grep -q EDAC [ $? -ne 0 ] && continue addr=$(echo "$line" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000" - #printf "addr=0x%x\n" $addr - echo $addr > $EINJ_IF/param1 - echo 1 > $EINJ_IF/error_inject - # add engough delay to get full kernel message - sleep 0.5 - #check the new kernel message with ref message - check_result "$line" + for retry in `seq 1 ${retry_cnt}` + do + echo $addr > $EINJ_IF/param1 + echo 1 > $EINJ_IF/error_inject + # Add enough delay to get full kernel message + # and avoid CE error count accumulation on same address. + # otherwise need to handle multi-error count cases in check_result + sleep 1 + # check the new kernel message with error message in reference file + check_result "$line" + if [ $? -eq 0 ]; then + write_log $addr "PASS" "$line" + break + elif [ "$retry" -eq "${retry_cnt}" ]; then + let "COUNT_FAIL += 1" + write_log $addr "FAIL" "$line" + fi + done done < $EDAC_REF_FILE } @@ -238,7 +268,7 @@ check_result() local addr local tmpstr local edac_str - + local ret=0 addr=$(echo "$@" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000" tmpstr="$@" # remove timestamp in head of each line @@ -246,20 +276,13 @@ check_result() dmesg | grep -q "$edac_str" if [ $? -ne 0 ]; then # re-check it to avoid later coming message - sleep 1 dmesg | grep -q "$edac_str" - if [ $? -eq 0 ]; then - printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE - else - printf "0x%016lx FAIL\n" $addr | tee -a $LOG_FILE - let "COUNT_FAIL += 1" + if [ $? -ne 0 ]; then + ret=1 fi - else - printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE fi - echo -e "\nEDAC dmesg output as below:\n" >> $LOG_FILE - dmesg -c >> $LOG_FILE - echo >> $LOG_FILE + + return $ret } check_mem_conf() |