aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2023-07-01 00:51:06 -0400
committerTheodore Ts'o <tytso@mit.edu>2023-07-01 00:51:06 -0400
commitdf7a7d1c5d784d9b1ef539b756357cf10d4f105f (patch)
treef9741ec2ed69b75a2986a7833ba3fcac5ca3946c
parent0f389da9f980e0bb51a45c7d464bf2b0edc09fdd (diff)
parent9e67ddbbede1eb9fc766fc2c2e642a890a4db915 (diff)
downloadxfstests-bld-df7a7d1c5d784d9b1ef539b756357cf10d4f105f.tar.gz
Merge remote-tracking branch 'leah/ltm-auto-resume'
-rw-r--r--fstests-bld/misc/syncfs.c7
-rwxr-xr-xrun-fstests/gce-xfstests41
-rw-r--r--run-fstests/util/gce-ltm-funcs12
-rw-r--r--run-fstests/util/get-config10
-rw-r--r--run-fstests/util/parse_cli11
-rwxr-xr-xtest-appliance/files/root/runtests.sh32
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/diff_stats.py105
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/gen_results_summary.py54
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/get_stats.py68
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/junitparser/__init__.py1
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/junitparser/junitparser.py6
-rw-r--r--test-appliance/files/usr/lib/python3/dist-packages/merge_stats.py45
-rwxr-xr-xtest-appliance/files/usr/local/bin/add_error_xunit48
-rwxr-xr-xtest-appliance/files/usr/local/lib/gce-add-metadata2
-rwxr-xr-xtest-appliance/files/usr/local/lib/gce-logger27
-rw-r--r--test-appliance/files/usr/local/lib/gce-server/ltm/shard.go59
-rw-r--r--test-appliance/files/usr/local/lib/gce-server/ltm/sharder.go41
-rw-r--r--test-appliance/files/usr/local/lib/gce-server/util/gcp/gcp.go7
-rw-r--r--test-appliance/files/usr/local/lib/gce-server/util/parser/parser.go1
-rw-r--r--test-appliance/files/usr/local/lib/gce-server/util/server/server.go29
-rw-r--r--test-appliance/gce-xfstests-bld.sh2
21 files changed, 516 insertions, 92 deletions
diff --git a/fstests-bld/misc/syncfs.c b/fstests-bld/misc/syncfs.c
index 6bb8a9ae..60e52f24 100644
--- a/fstests-bld/misc/syncfs.c
+++ b/fstests-bld/misc/syncfs.c
@@ -1,5 +1,5 @@
/*
- * syncfs.c -- issue
+ * syncfs.c -- issue syncfs on a file or directory
*/
#define _GNU_SOURCE
@@ -16,14 +16,14 @@ const char *progname;
static void usage(void)
{
- fprintf(stderr, "Usage: %s <file>\n");
+ fprintf(stderr, "Usage: %s <file>\n", progname);
exit(1);
}
int main(int argc, char **argv)
{
int fd;
-
+
progname = argv[0];
if (argc != 2)
usage();
@@ -38,4 +38,3 @@ int main(int argc, char **argv)
}
return 0;
}
-
diff --git a/run-fstests/gce-xfstests b/run-fstests/gce-xfstests
index d3732128..4373813a 100755
--- a/run-fstests/gce-xfstests
+++ b/run-fstests/gce-xfstests
@@ -3,6 +3,7 @@
XFSTESTS_FLAVOR=gce
RUN_ON_LTM=
RUN_ON_KCS=
+GCE_IMAGE_PROJECT=
t=$(echo ${XFSTESTS_FLAVOR}_xfstests_dir | tr "[:lower:]" "[:upper:]")
eval DIR="\$$t"
if test -z "$DIR"
@@ -232,6 +233,14 @@ case "$1" in
deldisks="--delete-disks all"
fi
shift
+
+ bg="&"
+ if test "$1" = "--wait"
+ then
+ bg=
+ shift
+ fi
+
for i in "$@"
do
if test -n "$deldisks"
@@ -244,8 +253,8 @@ case "$1" in
run_gcloud compute -q instances add-metadata "$i" \
--metadata "shutdown_reason=$reason" \
--zone "$zone" > /dev/null
- run_gcloud compute -q instances delete "$i" \
- --zone "$zone" $deldisks &
+ eval run_gcloud compute -q instances delete "$i" \
+ --zone "$zone" $deldisks $bg
done
exit 0
;;
@@ -408,13 +417,20 @@ case "$1" in
exit $?
;;
ssh)
+ # gce-xfstests ssh --user <user> <host> -- <cmd>
user=root
shift
while (( $# >= 1 )); do
case $1 in
--user|-u) shift
user="$1"
+ echo "user=$user"
;;
+ --) shift
+ ssh_cmd="$@"
+ CMD="--command="
+ break
+ ;;
-*) echo "Unknown option $1"
exit 1
;;
@@ -423,8 +439,17 @@ case "$1" in
esac
shift
done
- run_gcloud compute -q ssh $user@"$host" \
- --zone $(get_gce_zone "$host") $RUN_INTERNAL
+
+ # ssh_cmd must be quoted but passing "" when there is no command
+ # causes gcloud to complain (even if --command= lumped into ssh_cmd)
+ if test -n "$ssh_cmd"; then
+ run_gcloud compute -q ssh $user@"$host" \
+ --zone $(get_gce_zone $host) $RUN_INTERNAL --command="$ssh_cmd"
+ exit $?
+ fi
+
+ run_gcloud compute -q ssh $user@"$host" \
+ --zone $(get_gce_zone $host) $RUN_INTERNAL
exit $?
;;
scp)
@@ -1148,7 +1173,9 @@ fi
if test -n "$RUN_ON_LTM"; then
. "$DIR/util/gce-ltm-funcs"
- send_to_ltm $ORIG_CMDLINE_B64
+ if ! send_to_ltm $ORIG_CMDLINE_B64; then
+ exit 1
+ fi
exit 0
elif test -n "$RUN_ON_KCS"; then
if ! gsutil -q stat "gs://$GS_BUCKET/build_config" &> /dev/null
@@ -1158,7 +1185,9 @@ elif test -n "$RUN_ON_KCS"; then
gsutil cp "$DIR/../kernel-build/kernel-configs/x86_64-config-5.4" "gs://$GS_BUCKET/build_config"
fi
. "$DIR/util/gce-kcs-funcs"
- send_to_kcs $ORIG_CMDLINE_B64
+ if ! send_to_kcs $ORIG_CMDLINE_B64; then
+ exit 1
+ fi
exit 0
fi
diff --git a/run-fstests/util/gce-ltm-funcs b/run-fstests/util/gce-ltm-funcs
index 4da3e863..ac7b5a29 100644
--- a/run-fstests/util/gce-ltm-funcs
+++ b/run-fstests/util/gce-ltm-funcs
@@ -32,7 +32,14 @@ function send_to_ltm() {
local cmd_to_send=$1
shift
- if test ! -f "$DIR/.ltm_cookie_$GCE_PROJECT"; then
+ # Failed login will create an empty cookie file, so ensure
+ # the file exists and contains a cookie - sometimes ltm_post_json
+ # will succeed even when login fails, so we cannot simply remove
+ # the cookie file upon ltm_post_json failure
+ if test ! -f "$DIR/.ltm_cookie_$GCE_PROJECT" || \
+ ! grep "a.$GCE_PROJECT.gce-xfstests" "$DIR/.ltm_cookie_$GCE_PROJECT" &> /dev/null
+ then
+ echo "login attempt " >> /tmp/ltm-auto-resume.debug
# just create a new login session and store it in the cookie
ltm_post_json -c $DIR/.ltm_cookie_$GCE_PROJECT -d "{\"password\":\"$GCE_LTM_PWD\"}" \
"https://$LTM_HOSTNAME/login"
@@ -97,6 +104,9 @@ function send_to_ltm() {
if [ -n "$ARCH" ]; then
LTM_OPTS="${LTM_OPTS:+$LTM_OPTS, }\"arch\":\"$ARCH\""
fi
+ if [ -n "$MONITOR_TIMEOUT" ]; then
+ LTM_OPTS="${LTM_OPTS:+$LTM_OPTS, }\"monitor_timeout\":\"$MONITOR_TIMEOUT\""
+ fi
if [ -n "$LTM_OPTS" ]; then
LTM_OPTS="\"options\": {$LTM_OPTS}"
fi
diff --git a/run-fstests/util/get-config b/run-fstests/util/get-config
index 310f6c9b..52c86ef2 100644
--- a/run-fstests/util/get-config
+++ b/run-fstests/util/get-config
@@ -26,8 +26,14 @@ export KBUILD_DIR="$(dirname $DIR)/kernel-build"
# Source custom configs in ~/.config/ if present
[ -f "$HOME/.config/xfstests-common" ] && . "$HOME/.config/xfstests-common"
-[ -f "$HOME/.config/${XFSTESTS_FLAVOR}-xfstests" ] && \
- . "$HOME/.config/${XFSTESTS_FLAVOR}-xfstests"
+
+# If XFSTESTS_CONFIG is set, use that
+# otherwise, look for config in default location ~/.config/
+if [ -n "$XFSTESTS_CONFIG" -a -f "$XFSTESTS_CONFIG" ]; then
+ . "$XFSTESTS_CONFIG"
+elif [ -f "$HOME/.config/${XFSTESTS_FLAVOR}-xfstests" ]; then
+ . "$HOME/.config/${XFSTESTS_FLAVOR}-xfstests"
+fi
# For gce-xfstests, source the config for the active account if present
if test "$XFSTESTS_FLAVOR" = "gce" -a -z "$GCE_ACCOUNT" -a \
diff --git a/run-fstests/util/parse_cli b/run-fstests/util/parse_cli
index 7b66fb32..efad590e 100644
--- a/run-fstests/util/parse_cli
+++ b/run-fstests/util/parse_cli
@@ -87,6 +87,11 @@ print_help ()
echo " - Don't shard test VMs into other GCE zones"
echo " --bucket-subdir - Use the next argument as a bucket subdir"
fi
+ if flavor_in gce ; then
+ echo " --monitor-timeout time - LTM option to reboot test VM if no"
+ echo " status update after specified time. Accepted time"
+ echo " suffixes include \"h\", \"m\", \"s\"."
+ fi
echo ""
echo "Common file system configurations are:"
echo " 4k 1k ext3 nojournal ext3conv metacsum dioread_nolock "
@@ -119,7 +124,7 @@ validate_test_name()
if test -z "$DO_BLKTESTS" ; then
case "$1" in
btrfs*|ceph*|cifs*|ext4*|f2fs*|generic*|nfs*) ;;
- ocfs2*|overlay*|perf*|shared*|udf*|xfs*) ;;
+ ocfs2*|overlay*|perf*|shared*|udf*|xfs*|selftest*) ;;
*)
echo -e "Invalid xfstests test name: $1\n"
print_help
@@ -269,6 +274,7 @@ local-ssd-nvme
log
machtype:
modules:
+monitor-timeout:
nfssrv:
note:
no-action
@@ -759,6 +765,9 @@ while (( $# >= 1 )); do
--skip-kernel-arch-probe)
SKIP_KERNEL_ARCH_PROBE=YES
;;
+ --monitor-timeout) shift
+ MONITOR_TIMEOUT="$1"
+ ;;
--)
shift
break
diff --git a/test-appliance/files/root/runtests.sh b/test-appliance/files/root/runtests.sh
index c4ddb739..d2a0e6ef 100755
--- a/test-appliance/files/root/runtests.sh
+++ b/test-appliance/files/root/runtests.sh
@@ -36,6 +36,8 @@ function copy_xunit_results()
fi
rm "$RESULT"
fi
+
+ /root/xfstests/bin/syncfs $RESULT_BASE
}
# check to see if a device is assigned to be used
@@ -303,6 +305,7 @@ else
fi
touch "$RESULTS/fstest-completed"
+rm -f /run/last_logged
./check --help > /tmp/check-help
report_fmt=xunit
@@ -607,13 +610,33 @@ do
show_mount_opts
fi
gce_run_hooks fs-config-begin $TC
- for j in $(seq 1 $RPT_COUNT) ; do
+ RPT_START=1
+ if test -f "$RESULT_BASE/rpt_status"; then
+ RPT_START=$(cat "$RESULT_BASE/rpt_status" | sed 's:/.*::g')
+ fi
+ for j in $(seq $RPT_START $RPT_COUNT) ; do
+ echo "$j/$RPT_COUNT" > "$RESULT_BASE/rpt_status"
+ /root/xfstests/bin/syncfs "$RESULT_BASE"
gce_run_hooks pre-xfstests $TC $j
if test -n "$RUN_ONCE" ; then
if test -f "$RESULT_BASE/completed"
then
- head -n -2 "$RESULT_BASE/completed" > /tmp/completed
- mv /tmp/completed "$RESULT_BASE/completed"
+ last_test="$(tail -n 1 "$RESULT_BASE/completed")"
+
+ if test -f "$RESULT_BASE/results.xml"; then
+ add_error_xunit "$RESULT_BASE/results.xml" "$last_test" "xfstests.global"
+ else
+ # if first test crashes, make sure results.xml gets
+ # setup correctly via copy_xunit_results
+ add_error_xunit "$RESULT_BASE/result.xml" "$last_test" "xfstests.global"
+ copy_xunit_results
+ fi
+ /root/xfstests/bin/syncfs $RESULT_BASE
+
+ # this was part of the in-progress preemption work,
+ # removing for now as it conflicts with the crash recovery stuff
+ # head -n -2 "$RESULT_BASE/completed" > /tmp/completed
+ # mv /tmp/completed "$RESULT_BASE/completed"
else
touch "$RESULT_BASE/completed"
fi
@@ -627,7 +650,7 @@ do
then
echo ./check -R $report_fmt $fail_test_loop -T $EXTRA_OPT \
$AEX $TEST_SET_EXCLUDE $(cat /tmp/tests-to-run) \
- > "$RESULT_BASE/check-cmd"
+ >> "$RESULT_BASE/check-cmd"
bash ./check -R $report_fmt $fail_test_loop -T $EXTRA_OPT \
$AEX $TEST_SET_EXCLUDE $(cat /tmp/tests-to-run)
copy_xunit_results
@@ -642,6 +665,7 @@ do
fi
rm -f "$RESULT_BASE/completed"
done
+ rm -f "$RESULT_BASE/rpt_status"
if test -n "$RUN_ON_GCE"
then
gsutil cp "gs://$GS_BUCKET/check-time.tar.gz" /tmp >& /dev/null
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/diff_stats.py b/test-appliance/files/usr/lib/python3/dist-packages/diff_stats.py
new file mode 100644
index 00000000..7cd218ea
--- /dev/null
+++ b/test-appliance/files/usr/lib/python3/dist-packages/diff_stats.py
@@ -0,0 +1,105 @@
+#!/usr/bin/python3
+
+import argparse
+import sys
+from gen_results_summary import TestStats
+import xml.etree.ElementTree as ET
+from junitparser import JUnitXml, Property, Properties, Failure, Error, Skipped
+
+
+# s[cfg] = cfg_stats
+# cfg_stats[test] = TestStats()
+# consider s1 the baseline
+def diff_stats(s1, s2, threshold, output_file, input_file1, input_file2):
+ """Compare the statistics between two Stats, report regressions and unexpected results"""
+ print(f"Writing results to {output_file}")
+
+ skip_str=""
+ error_str=""
+ file = open(output_file, 'w')
+ file.write(f'Regression check {input_file1} -> {input_file2}:\n\n')
+ for cfg in s1.keys():
+ if cfg not in s2.keys():
+ file.write(f'***Warning: missing config {cfg} in {input_file2}***\n')
+
+ for cfg in s2.keys():
+ file.write(f'{cfg:-^45}\n')
+ if cfg not in s1.keys():
+ file.write(f'***Warning: missing config {cfg} in {input_file1}***\n')
+ continue
+ for test_name in s2[cfg]:
+ test = s2[cfg][test_name]
+ if test_name not in s1[cfg]:
+ file.write(f'***Warning: {cfg}:{test_name} run on {input_file2} but not on {input_file1}***\n')
+ continue
+ if test.failed > 0:
+ test_1 = s1[cfg][test_name]
+ fail_rate_1 = 100.0 * test_1.failed / test_1.total
+ fail_rate_2 = 100.0 * test.failed / test.total
+ if fail_rate_2 >= fail_rate_1 + threshold:
+ file.write(f'{test_name}: {test_1.failed}/{test_1.total} ({fail_rate_1:.2f}%) -> {test.failed}/{test.total} ({fail_rate_2:.2f}%)\n')
+
+ test_1 = s1[cfg][test_name]
+ skip_rate_1 = 100.0 * test_1.skipped / test_1.total
+ skip_rate_2 = 100.0 * test.skipped / test.total
+ if skip_rate_1 != skip_rate_2:
+ skip_str+=f'{cfg}:{test_name} skip rate changed {test_1.skipped}/{test_1.total} ({skip_rate_1:.2f}%) -> {test.skipped}/{test.total} ({skip_rate_2:.2f}%)\n'
+
+ if test.error > 0:
+ test_1 = s1[cfg][test_name]
+ error_rate_1 = 100.0 * test_1.error / test_1.total
+ error_rate_2 = 100.0 * test.error / test.total
+ # always print error stats
+ error_str+=f'{cfg}:{test_name} ERROR {test_1.error}/{test_1.total} ({error_rate_1:.2f})% -> {test.error}/{test.total} ({error_rate_2:.2f}%)\n'
+ file.write('\n')
+
+ if len(error_str) > 0:
+ file.write('\n*** ERROR(S) occurred in new test set: ***\n')
+ file.write(error_str)
+
+ if len(skip_str) > 0:
+ file.write('\n*** WARNING: skip rate changed between test sets: ***\n')
+ file.write(skip_str)
+ file.close()
+
+
+def read_stats(input_file):
+ """Read test statistics from file"""
+ stats = {}
+ tree = ET.parse(input_file)
+ root = tree.getroot()
+
+ for cfg_element in root.findall('config'):
+ cfg = cfg_element.get('name')
+ if cfg not in stats:
+ stats[cfg] = {}
+ for test_element in cfg_element.findall('test'):
+ test = TestStats()
+
+ name = test_element.get('name')
+ test.failed = int(test_element.get('failed'))
+ test.skipped = int(test_element.get('skipped'))
+ test.error = int(test_element.get('error'))
+ test.total = int(test_element.get('total'))
+
+ stats[cfg][name] = test
+
+ return stats
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('stats_file1', help='First stats file (baseline)', type=str)
+ parser.add_argument('stats_file2', help='Second stats file (file to compare to baseline)', type=str)
+ parser.add_argument('--outfile', help='Diff output file', default="stats.diff", type=str)
+ parser.add_argument('--regression_threshold', help='Percent (int) increase needed in fail rate to determine regression', type=int, default=5)
+ args = parser.parse_args()
+
+ stats1 = read_stats(args.stats_file1)
+ stats2 = read_stats(args.stats_file2)
+
+ diff_stats(stats1, stats2, args.regression_threshold, args.outfile, args.stats_file1, args.stats_file2)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/gen_results_summary.py b/test-appliance/files/usr/lib/python3/dist-packages/gen_results_summary.py
index 44fb07d2..fe37e64d 100644
--- a/test-appliance/files/usr/lib/python3/dist-packages/gen_results_summary.py
+++ b/test-appliance/files/usr/lib/python3/dist-packages/gen_results_summary.py
@@ -135,6 +135,35 @@ def sum_testsuites(testsuites):
errors += testsuite.errors
return (tests, skipped, failures, errors, runtime)
+def get_testsuite_stats(testsuite):
+ """Aggregate stats on individual tests"""
+ Stats = {}
+ for test_case in testsuite:
+ isFail = False
+ isSkipped = False
+ isError = False
+ for entry in test_case.result:
+ if isinstance(entry, Failure):
+ isFail = True
+ if isinstance(entry, Skipped):
+ isSkipped = True
+ if isinstance(entry, Error):
+ isError = True
+ if test_case.name in Stats:
+ s = Stats[test_case.name]
+ else:
+ s = TestStats()
+ Stats[test_case.name] = s
+ s.total += 1
+ if isFail:
+ s.failed += 1
+ if isSkipped:
+ s.skipped += 1
+ if isError:
+ s.error += 1
+
+ return Stats
+
def print_summary(out_f, testsuite, verbose):
"""Print a summary for a particular test suite
@@ -179,30 +208,7 @@ def print_summary(out_f, testsuite, verbose):
out_f.write(" %-12s %-8s %ds\n" %
(test_case.name, status, test_case.time))
else:
- Stats = {}
- for test_case in testsuite:
- isFail = False
- isSkipped = False
- isError = False
- for entry in test_case.result:
- if isinstance(entry, Failure):
- isFail = True
- if isinstance(entry, Skipped):
- isSkipped = True
- if isinstance(entry, Error):
- isError = True
- if test_case.name in Stats:
- s = Stats[test_case.name]
- else:
- s = TestStats()
- Stats[test_case.name] = s
- s.total += 1
- if isFail:
- s.failed += 1
- if isSkipped:
- s.skipped += 1
- if isError:
- s.error += 1
+ Stats = get_testsuite_stats(testsuite)
wp = wrapped_print(out_f, 'Failures', ' ')
for t in Stats:
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/get_stats.py b/test-appliance/files/usr/lib/python3/dist-packages/get_stats.py
new file mode 100644
index 00000000..4cd62815
--- /dev/null
+++ b/test-appliance/files/usr/lib/python3/dist-packages/get_stats.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python3
+
+import argparse
+import sys
+from gen_results_summary import get_property, get_testsuite_stats, get_results
+from junitparser import JUnitXml, Property, Properties, Failure, Error, Skipped
+
+try:
+ from lxml import etree
+except ImportError:
+ from xml.etree import ElementTree as etree
+
+
+# reports is list of results from each xml file
+# stats[cfg] = cfg_stats
+# cfg_stats[test] = TestStats()
+def get_stats_from_dir(results_dir):
+ """From a results dir, return a list of reports and test statistics"""
+ reports = []
+ stats = {}
+ for filename in get_results(results_dir):
+ reports.append(JUnitXml.fromfile(filename))
+
+ if len(reports) == 0:
+ sys.stderr.write(f'Error: could not find any reports in {results_dir}')
+ return None
+
+ for testsuite in reports:
+ cfg = get_property(testsuite.properties(), 'TESTCFG') or get_property(testsuite.properties(), 'FSTESTCFG')
+ if cfg in stats:
+ sys.stderr.write(f'Found duplicate config {cfg}')
+ return None
+ stats[cfg] = get_testsuite_stats(testsuite)
+
+ return stats
+
+# writes all configs into single output file
+# condensing into entries of test->(failed, skipped, error, total)
+# this will let us store stats and easily merge from other runs
+# without having to reprocess everything
+def write_stats(s, output_file):
+ """Write the test statistics to a file"""
+ root = etree.Element("configs")
+ for cfg in s:
+ cfg_element = etree.SubElement(root, "config", name=cfg)
+ for test_name in s[cfg]:
+ test = s[cfg][test_name]
+ etree.SubElement(cfg_element, "test", name=test_name, failed=str(test.failed), skipped=str(test.skipped), error=str(test.error), total=str(test.total))
+
+ tree = etree.ElementTree(root)
+ etree.indent(tree, space="\t", level=0)
+ tree.write(output_file, encoding='utf-8')
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('results_dir', help='Results directory to process', type=str)
+ parser.add_argument('--outfile', help='Diff output file', default='./stats.xml', type=str)
+ args = parser.parse_args()
+
+ stats = get_stats_from_dir(args.results_dir)
+
+ if stats == None:
+ return -1
+
+ write_stats(stats, args.outfile)
+
+if __name__ == "__main__":
+ main()
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/junitparser/__init__.py b/test-appliance/files/usr/lib/python3/dist-packages/junitparser/__init__.py
index e3d8da0c..55b9ddbc 100644
--- a/test-appliance/files/usr/lib/python3/dist-packages/junitparser/__init__.py
+++ b/test-appliance/files/usr/lib/python3/dist-packages/junitparser/__init__.py
@@ -8,6 +8,7 @@ from .junitparser import (
Skipped,
Failure,
Error,
+ Result,
TestCase,
Properties,
IntAttr,
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/junitparser/junitparser.py b/test-appliance/files/usr/lib/python3/dist-packages/junitparser/junitparser.py
index eb38b298..b3bbd853 100644
--- a/test-appliance/files/usr/lib/python3/dist-packages/junitparser/junitparser.py
+++ b/test-appliance/files/usr/lib/python3/dist-packages/junitparser/junitparser.py
@@ -310,7 +310,11 @@ class JUnitXml(Element):
if parse_func:
tree = parse_func(filepath)
else:
- tree = etree.parse(filepath) # nosec
+ try:
+ tree = etree.parse(filepath) # nosec
+ except etree.XMLSyntaxError:
+ p = etree.XMLParser(huge_tree=True)
+ tree = etree.parse(filepath, parser=p)
root_elem = tree.getroot()
if root_elem.tag == "testsuites":
instance = cls()
diff --git a/test-appliance/files/usr/lib/python3/dist-packages/merge_stats.py b/test-appliance/files/usr/lib/python3/dist-packages/merge_stats.py
new file mode 100644
index 00000000..a3148142
--- /dev/null
+++ b/test-appliance/files/usr/lib/python3/dist-packages/merge_stats.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python3
+
+import argparse
+import sys
+import xml.etree.ElementTree as ET
+import get_stats
+import diff_stats
+from gen_results_summary import TestStats
+from junitparser import JUnitXml, Property, Properties, Failure, Error, Skipped
+
+
+def merge_stats(stats1, stats2):
+ """Merges stats2 into stats1"""
+ for cfg in stats2:
+ if cfg not in stats1:
+ stats1[cfg] = {}
+
+ for test_name in stats2[cfg]:
+ if test_name not in stats1[cfg]:
+ stats1[cfg][test_name] = TestStats()
+ stats1[cfg][test_name].failed += stats2[cfg][test_name].failed
+ stats1[cfg][test_name].skipped += stats2[cfg][test_name].skipped
+ stats1[cfg][test_name].error += stats2[cfg][test_name].error
+ stats1[cfg][test_name].total += stats2[cfg][test_name].total
+
+ return stats1
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('stats_file', help='First stats file', type=str)
+ parser.add_argument('stats_files_merge', nargs='+', help='List of stats files to merge', type=str)
+ parser.add_argument('--outfile', default='merged_stats.xml', help='Output xml file', type=str)
+ args = parser.parse_args()
+
+ stats = diff_stats.read_stats(args.stats_file)
+
+ for file in args.stats_files_merge:
+ stats_merge = diff_stats.read_stats(file)
+ stats = merge_stats(stats, stats_merge)
+
+ get_stats.write_stats(stats, args.outfile)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/test-appliance/files/usr/local/bin/add_error_xunit b/test-appliance/files/usr/local/bin/add_error_xunit
new file mode 100755
index 00000000..0f12e983
--- /dev/null
+++ b/test-appliance/files/usr/local/bin/add_error_xunit
@@ -0,0 +1,48 @@
+#!/usr/bin/python3
+import argparse
+import os
+import sys
+from junitparser import JUnitXml, TestSuite, TestCase, Result, Error
+
+def get_test_suite(filename):
+ if not os.path.exists(filename):
+ ts = TestSuite()
+ else:
+ try:
+ ts = JUnitXml.fromfile(filename)
+ except IOError as e:
+ sys.exit("Couldn't open %s: %s" % (filename, e[1]))
+
+ if type(ts) != TestSuite:
+ sys.exit('%s is not a xUnit report file' % filename)
+ return ts
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('input_file', help='input xUnit result file')
+parser.add_argument('testname', help='name of test causing error')
+parser.add_argument('classname', help='classname for test case')
+args = parser.parse_args()
+
+ts = get_test_suite(args.input_file)
+
+result = Result()
+
+error = Error(result)
+error.message='Machine rebooted (crash or test timeout)'
+error.type='TestFail'
+
+tc = TestCase()
+tc.classname=args.classname
+tc.name=args.testname
+tc.time = 0
+tc.result = [error]
+
+# this also updates the statistics
+ts.add_testcase(tc)
+
+ts.write(args.input_file + '.new', pretty=True)
+if os.path.exists(args.input_file):
+ os.rename(args.input_file, args.input_file + '.error.bak')
+os.rename(args.input_file + '.new' , args.input_file)
+
diff --git a/test-appliance/files/usr/local/lib/gce-add-metadata b/test-appliance/files/usr/local/lib/gce-add-metadata
index 672975b1..d77f50f1 100755
--- a/test-appliance/files/usr/local/lib/gce-add-metadata
+++ b/test-appliance/files/usr/local/lib/gce-add-metadata
@@ -7,4 +7,4 @@ then
fi
flock /run/xattr.lock gcloud compute instances -q add-metadata \
- --zone $ZONE $(hostname) --metadata "$@" >& /dev/null
+ --zone $ZONE $(hostname) --metadata "^##^$@" >& /dev/null
diff --git a/test-appliance/files/usr/local/lib/gce-logger b/test-appliance/files/usr/local/lib/gce-logger
index 68a1970e..4eadca31 100755
--- a/test-appliance/files/usr/local/lib/gce-logger
+++ b/test-appliance/files/usr/local/lib/gce-logger
@@ -6,10 +6,31 @@ then
run_hooks logger "$*"
fi
+is_test=
status=$(echo "$*" | sed -e 's/^run xfstest //')
if echo "$*" | grep -q "^run xfstest "
then
+ is_test="y"
echo "$status" >> $RESULT_BASE/completed
+
+ if test ! -f $RESULT_BASE/rpt_status -o \
+ ! -f $RESULT_BASE/tests-to-run -o \
+ ! -f $RESULT_BASE/completed
+ then
+ status="--% $status"
+ else
+ rpt_status=$(cat $RESULT_BASE/rpt_status)
+ current_rpt=${rpt_status%%/*}
+ total_rpt=${rpt_status##*/}
+ total_tests=$(cat $RESULT_BASE/tests-to-run | sort | uniq | wc -l)
+ count_completed=$(cat $RESULT_BASE/completed | sort | uniq | wc -l)
+
+ progress=$(( ( ( $current_rpt - 1 ) * $total_tests + $count_completed ) * 100 / ( $total_tests * $total_rpt ) ))
+ status="$progress% $status"
+ fi
+
+ # sync completed file
+ /root/xfstests/bin/syncfs $RESULT_BASE
fi
if test -f /run/fstest-config
@@ -18,7 +39,11 @@ then
status="$cfg $status"
fi
-if test -z "$(find /run/last_logged -mmin -1 -print 2> /dev/null)"
+# force first test to upload it's test status and wait for it to finish
+if test -n "$is_test" -a ! -s /run/last_logged; then
+ /usr/local/lib/gce-add-metadata "status=$(date +%H:%M) $status"
+ echo "Started testing" > /run/last_logged
+elif test -z "$(find /run/last_logged -mmin -1 -print 2> /dev/null)"
then
/usr/local/lib/gce-add-metadata "status=$(date +%H:%M) $status" &
touch /run/last_logged
diff --git a/test-appliance/files/usr/local/lib/gce-server/ltm/shard.go b/test-appliance/files/usr/local/lib/gce-server/ltm/shard.go
index 25af29cd..bef49236 100644
--- a/test-appliance/files/usr/local/lib/gce-server/ltm/shard.go
+++ b/test-appliance/files/usr/local/lib/gce-server/ltm/shard.go
@@ -38,6 +38,7 @@ type ShardWorker struct {
vmStatus string
vmtestStart time.Time
testResult server.ResultType
+ vmReset bool
log *logrus.Entry
logPath string
@@ -48,9 +49,9 @@ type ShardWorker struct {
}
const (
- monitorTimeout = 1 * time.Hour
- noStatusTimeout = 5 * time.Minute
+ noStatusTimeout = 10 * time.Minute
monitorInterval = 60 * time.Second
+ resetTimeout = 10 * time.Minute
gsInterval = 10 * time.Second
maxAttempts = 5
)
@@ -70,6 +71,7 @@ func NewShardWorker(sharder *ShardScheduler, shardID string, config string, zone
vmStatus: "waiting for launch",
vmtestStart: time.Now(),
testResult: server.DefaultResult,
+ vmReset: false,
log: sharder.log.WithField("shardID", shardID),
logPath: logPath,
@@ -91,23 +93,25 @@ func NewShardWorker(sharder *ShardScheduler, shardID string, config string, zone
"--no-email",
"-c", config,
}
+
if sharder.arch != "" {
shard.args = append(shard.args, "--arch", sharder.arch)
}
- shard.args = append(shard.args, sharder.validArgs...)
- var defaultProj bool = true
- for _, arg := range shard.args {
+ var imgProjFlag bool = false
+ for _, arg := range sharder.validArgs {
if arg == "--image-project" {
- defaultProj = false
+ imgProjFlag = true
break
}
}
- if defaultProj {
- shard.args = append(shard.args, "--image-project", sharder.projID)
+ if ! imgProjFlag && len(sharder.imgProjID) > 0 {
+ shard.args = append(shard.args, "--image-project", sharder.imgProjID)
}
+ shard.args = append(shard.args, sharder.validArgs...)
+
return &shard
}
@@ -185,6 +189,7 @@ func (shard *ShardWorker) monitor() {
if *metaData.Value != shard.vmStatus {
shard.vmStatus = *metaData.Value
shard.vmtestStart = time.Now()
+ shard.vmReset = false
break
}
}
@@ -203,20 +208,30 @@ func (shard *ShardWorker) monitor() {
log.Debug("waiting to get test status metadata")
}
- if time.Since(shard.vmtestStart) > monitorTimeout {
- if !shard.sharder.keepDeadVM {
- shard.shutdownOnTimeout(instanceInfo.Metadata)
- }
- shard.vmStatus = "timeout on one test"
- shard.testResult = server.Hang
- log.WithFields(logrus.Fields{
- "status": shard.vmStatus,
- "start": shard.vmtestStart.Format(time.Stamp),
- }).Errorf("Instance seems to have wedged, no status update for %s", monitorTimeout.Round(time.Minute))
+ if shard.vmReset && time.Since(shard.vmtestStart) > resetTimeout {
+ log.Errorf("VM did not come back online after reset, exiting");
return
}
+ // Reset VM if we don't get a status update
+ // Skip check if we are already performing a reset
+ // Selftests may limit monitorTimeout to shorter than noStatusTimeout
+ // so skip check if we are still launching
+ if time.Since(shard.vmtestStart) > shard.sharder.monitorTimeout &&
+ ! shard.vmReset && shard.vmStatus != "launching" {
+ log.Debug("Resetting VM")
+ err := shard.sharder.gce.ResetVM(shard.sharder.projID, shard.zone, shard.name)
+ if err != nil {
+ log.Errorf("Failed to reset %s", shard.name)
+ shard.vmStatus = "failed to reset after timeout"
+ shard.testResult = server.Error
+ return
+ }
+ shard.vmReset = true
+ shard.vmtestStart = time.Now()
+ }
+
log.WithFields(logrus.Fields{
"status": shard.vmStatus,
"start": shard.vmtestStart.Format(time.Stamp),
@@ -283,8 +298,7 @@ func (shard *ShardWorker) shutdownOnTimeout(metadata *compute.Metadata) {
/*
finish calls gce-xfstests scripts to fetch and unpack test result files.
-It deletes the results in gs bucket and local serial port output.
-It also determines testResult:
+It deletes the results in gs bucket and determines testResult:
Default VM finishes without issues, test result is found;
Crash VM started running tests but no test result is found;
@@ -321,11 +335,6 @@ func (shard *ShardWorker) finish() {
shard.log.Panic("Failed to find unpacked result files")
}
- if check.FileExists(shard.serialOutputPath) && !shard.vmTimeout {
- err = os.Remove(shard.serialOutputPath)
- check.NoError(err, shard.log, "Failed to remove dir")
- }
-
prefix := fmt.Sprintf("%s/results.%s", shard.sharder.bucketSubdir, shard.resultsName)
_, err = shard.sharder.gce.DeleteFiles(prefix)
check.NoError(err, shard.log, "Failed to delete file")
diff --git a/test-appliance/files/usr/local/lib/gce-server/ltm/sharder.go b/test-appliance/files/usr/local/lib/gce-server/ltm/sharder.go
index 957efa98..95d1f9e6 100644
--- a/test-appliance/files/usr/local/lib/gce-server/ltm/sharder.go
+++ b/test-appliance/files/usr/local/lib/gce-server/ltm/sharder.go
@@ -23,6 +23,7 @@ import (
"sort"
"strings"
"sync"
+ "time"
"thunk.org/gce-server/util/check"
"thunk.org/gce-server/util/email"
@@ -36,12 +37,14 @@ import (
)
const genResultsSummaryPath = "/usr/local/bin/gen_results_summary"
+const defaultMonitorTimeout = 1 * time.Hour
// ShardScheduler schedules tests and aggregates reports.
type ShardScheduler struct {
- testID string
- projID string
- origCmd string
+ testID string
+ projID string
+ imgProjID string
+ origCmd string
zone string
region string
@@ -53,6 +56,7 @@ type ShardScheduler struct {
reportReceiver string
maxShards int
keepDeadVM bool
+ monitorTimeout time.Duration
reportKCS bool
testRequest server.TaskRequest
@@ -101,6 +105,9 @@ func NewShardScheduler(c server.TaskRequest, testID string) *ShardScheduler {
projID, err := gcp.GceConfig.Get("GCE_PROJECT")
check.Panic(err, log, "Failed to get project config")
+ imgProjID, err := gcp.GceConfig.Get("GCE_IMAGE_PROJECT")
+ check.Panic(err, log, "Failed to get image project")
+
gsBucket, err := gcp.GceConfig.Get("GS_BUCKET")
check.Panic(err, log, "Failed to get gs bucket config")
@@ -108,9 +115,10 @@ func NewShardScheduler(c server.TaskRequest, testID string) *ShardScheduler {
log.Info("Initiating test sharder")
sharder := ShardScheduler{
- testID: testID,
- projID: projID,
- origCmd: origCmd,
+ testID: testID,
+ projID: projID,
+ imgProjID: imgProjID,
+ origCmd: origCmd,
zone: zone,
region: region,
@@ -122,6 +130,7 @@ func NewShardScheduler(c server.TaskRequest, testID string) *ShardScheduler {
reportReceiver: c.Options.ReportEmail,
maxShards: 0,
keepDeadVM: false,
+ monitorTimeout: defaultMonitorTimeout,
reportKCS: false,
testRequest: c,
@@ -143,6 +152,15 @@ func NewShardScheduler(c server.TaskRequest, testID string) *ShardScheduler {
if sharder.bucketSubdir == "" {
sharder.bucketSubdir = "results"
}
+ if c.Options.MonitorTimeout != "" {
+ sharder.monitorTimeout, err = time.ParseDuration(c.Options.MonitorTimeout)
+ if err != nil {
+ sharder.monitorTimeout = defaultMonitorTimeout
+ sharder.log.WithField("MonitorTimeout", c.Options.MonitorTimeout).Error("Unable to parse --monitor-timeout option, using default value")
+ } else {
+ sharder.log.WithField("MonitorTimeout", sharder.monitorTimeout).Info("Parsed monitor timeout argument")
+ }
+ }
sharder.validArgs, sharder.configs, err = getConfigs(sharder.origCmd)
check.Panic(err, log, "Failed to parse config from origCmd")
@@ -368,6 +386,7 @@ func (sharder *ShardScheduler) aggResults() {
"unpackedResultsDir": shard.unpackedResultsDir,
})
log.Debug("Moving shard result files into aggregate folder")
+ shardHasResults := false
if check.DirExists(shard.unpackedResultsDir) {
err := os.RemoveAll(sharder.aggDir + shard.shardID)
@@ -376,16 +395,22 @@ func (sharder *ShardScheduler) aggResults() {
err = os.Rename(shard.unpackedResultsDir, sharder.aggDir+shard.shardID)
check.Panic(err, log, "Failed to move dir")
+ shardHasResults = true
hasResults = true
- } else if check.FileExists(shard.serialOutputPath) {
+ }
+
+ if check.FileExists(shard.serialOutputPath) {
err := os.RemoveAll(sharder.aggDir + shard.shardID + ".serial")
check.Panic(err, log, "Failed to remove dir")
err = os.Rename(shard.serialOutputPath, sharder.aggDir+shard.shardID+".serial")
check.Panic(err, log, "Failed to move dir")
+ shardHasResults = true
hasResults = true
- } else {
+ }
+
+ if ! shardHasResults {
log.Warn("Shard has no results available")
}
}
diff --git a/test-appliance/files/usr/local/lib/gce-server/util/gcp/gcp.go b/test-appliance/files/usr/local/lib/gce-server/util/gcp/gcp.go
index 65480d6d..da8d7401 100644
--- a/test-appliance/files/usr/local/lib/gce-server/util/gcp/gcp.go
+++ b/test-appliance/files/usr/local/lib/gce-server/util/gcp/gcp.go
@@ -286,3 +286,10 @@ func NotFound(err error) bool {
}
return false
}
+
+func (gce *Service) ResetVM(project string, zone string, instance string) error {
+ instancesService := compute.NewInstancesService(gce.service)
+ call := instancesService.Reset(project, zone, instance)
+ _, err := call.Do()
+ return err
+}
diff --git a/test-appliance/files/usr/local/lib/gce-server/util/parser/parser.go b/test-appliance/files/usr/local/lib/gce-server/util/parser/parser.go
index 81ff5fa0..045ab86c 100644
--- a/test-appliance/files/usr/local/lib/gce-server/util/parser/parser.go
+++ b/test-appliance/files/usr/local/lib/gce-server/util/parser/parser.go
@@ -40,6 +40,7 @@ var invalidOpts = []string{
"--watch",
"--bisect-good",
"--bisect-bad",
+ "--monitor-timeout",
}
/*
diff --git a/test-appliance/files/usr/local/lib/gce-server/util/server/server.go b/test-appliance/files/usr/local/lib/gce-server/util/server/server.go
index 4c9122e8..d7e3690e 100644
--- a/test-appliance/files/usr/local/lib/gce-server/util/server/server.go
+++ b/test-appliance/files/usr/local/lib/gce-server/util/server/server.go
@@ -116,20 +116,21 @@ const (
// UserOptions contains configs user sends to LTM or KCS.
type UserOptions struct {
- NoRegionShard bool `json:"no_region_shard"`
- BucketSubdir string `json:"bucket_subdir"`
- GsKernel string `json:"gs_kernel"`
- ReportEmail string `json:"report_email"`
- CommitID string `json:"commit_id"`
- GitRepo string `json:"git_repo"`
- BranchName string `json:"branch_name"`
- UnWatch string `json:"unwatch"`
- BadCommit string `json:"bad_commit"`
- GoodCommit string `json:"good_commit"`
- KConfig string `json:"kconfig"`
- KConfigOpts string `json:"kconfig_opts"`
- KbuildOpts string `json:"kbuild_opts"`
- Arch string `json:"arch"`
+ NoRegionShard bool `json:"no_region_shard"`
+ BucketSubdir string `json:"bucket_subdir"`
+ GsKernel string `json:"gs_kernel"`
+ ReportEmail string `json:"report_email"`
+ CommitID string `json:"commit_id"`
+ GitRepo string `json:"git_repo"`
+ BranchName string `json:"branch_name"`
+ UnWatch string `json:"unwatch"`
+ BadCommit string `json:"bad_commit"`
+ GoodCommit string `json:"good_commit"`
+ KConfig string `json:"kconfig"`
+ KConfigOpts string `json:"kconfig_opts"`
+ KbuildOpts string `json:"kbuild_opts"`
+ Arch string `json:"arch"`
+ MonitorTimeout string `json:"monitor_timeout"`
}
// InternalOptions contains configs used by LTM and KCS internally.
diff --git a/test-appliance/gce-xfstests-bld.sh b/test-appliance/gce-xfstests-bld.sh
index 4c54f8ce..2c054d0f 100644
--- a/test-appliance/gce-xfstests-bld.sh
+++ b/test-appliance/gce-xfstests-bld.sh
@@ -372,6 +372,8 @@ sed -i -e '/ExecStart/s/agetty/agetty -a root/' \
-e 's/After=rc.local.service/After=network.target/' \
/etc/systemd/system/telnet-getty@.service
+echo "kernel.panic=60" >> /etc/sysctl.conf
+
systemctl enable kvm-xfstests.service
systemctl enable gce-fetch-gs-files.service
systemctl enable gce-finalize-wait.service