aboutsummaryrefslogtreecommitdiffstats
path: root/tsrc/random_offline
blob: adb68f70a214db196c459139bc045fa41f1c85a3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/bin/bash
# randomly soft offline pages
# random_offline options
# -t seconds   runtime in seconds (default unlimited)
# -m max-pages maximum pages to tie up before unpoisoning
# -s seed      random seed
# Note: running this for too long may still run out of memory
# because unpoison cannot completely undo what soft offline
# does to larger free memory areas (TBD in the kernel)
# Author: Andi Kleen

# fixme: uses time seed, non reproducible

#mount -t debugfs none /debug

THRESH=1000
SEED=""
RUNTIME=""
DEBUG=/sys/kernel/debug

fail() { 
	echo "ERROR: $@"
	exit 0
}

usage() { 
	echo "Usage:"
	echo "random_offline options"
	echo -- "-t seconds   runtime in seconds (default unlimited)"
	echo -- "-m max-pages maximum pages to tie up before unpoisoning"
	echo -- "-s seed      random seed"
	fail "Invalid option $1"
}

while getopts "t:m:s:" option ; do
	case "$option" in 
	t) RUNTIME=$OPTARG ;;
	m) THRESH=$OPTARG ;;
	s) SEED=$OPTARG ;;
	*) usage $option ;;
	esac
done

[ "$(whoami)" != root ] && fail "Not root"
[ ! -d $DEBUG/hwpoison ] && mount -t debugfs none $DEBUG
[ ! -d $DEBUG/hwpoison ] && fail "No debugfs"
[ ! -w /sys/devices/system/memory/soft_offline_page ] && fail "No soft offlining support in kernel"
[ ! -w $DEBUG/hwpoison/unpoison-pfn ] && fail "no unpoison support in kernel"

end_of_memory() {
	for i in /sys/firmware/memmap/* ; do
		case "$(< $i/type)" in
		"System RAM") ;;
		*) continue ;;
		esac	
	
		k=$(< $i/end)
		k=${k/0x/}
		k=$(echo $k | tr a-z A-Z)

		echo "ibase=16; $k/1000" | bc
	done | sort -n | tail -n1
}

E=$(end_of_memory)

echo "soft offlining pages upto $E" 

unpoison() { 
	if [ ! -f offlined ] ; then
		return
	fi
	
	echo unpoisioning
	while read i ; do 
		#echo -n ,
		#echo "u $i"
		(( utotal++ ))
	 	if ! echo $i | sed 's/000$//' > $DEBUG/hwpoison/unpoison-pfn ; then
			echo "$i $?" >> unpoison-failed
			echo "unpoisioning $i failed: $?"
		else
			(( usuccess++ ))
		fi
	done < offlined
	echo done
	echo
}

trap unpoison 0

if [ "$SEED" = "" ] ; then
	SEED=$(date +%s)
fi
RANDOM=$SEED
echo "Using random seed $SEED"

start=$(date +%s)
failed=0
ufailed=0
success=0
usuccess=0
total=0
utotal=0

cbefore=$(grep HardwareCorrupted /proc/meminfo)


(( k = 0 ))
rm -f offlined unpoison-failed
while true ; do 
	T=$( 
	R=$RANDOM
	X=$(echo "obase=16; ($R%$E)*4096"  | bc)
	echo 0x$X
	) 
	#echo "p $T" 
	(( total++ )) 
	if echo 2>/dev/null $T >/sys/devices/system/memory/soft_offline_page ; then
		echo $T >> offlined
		(( success++ )) 
	else
		#echo offlining $T failed $?
		(( failed++ ))
		true
	fi
	#echo -n . 

	(( k++ )) 
	if [ $k -gt $THRESH ] ; then
		unpoison
		(( k = 0 ))
		rm offlined
	fi

	((DIFF = $(date +%s) - $start))
	if [ ! -z "$RUNTIME" -a $DIFF -gt "$RUNTIME" ] ; then
		echo time over
		break
	fi
done	

if [ -f unpoison-failed ] ; then
	ufailed=$(wc -l unpoison-failed | awk ' {print $1}')
fi
echo "soft-poison: success $success failed $failed of total $total"
echo "unpoison-failed: success $usuccess failed $ufailed of total $utotal"
echo "poisoned before: $cbefore"
echo -n "poisoned after: "
grep HardwareCorrupted /proc/meminfo

### xxx automatic success/failure criteria?