1f08c3bdfSopenharmony_ci#!/bin/bash
2f08c3bdfSopenharmony_ci# randomly soft offline pages
3f08c3bdfSopenharmony_ci# random_offline options
4f08c3bdfSopenharmony_ci# -t seconds   runtime in seconds (default unlimited)
5f08c3bdfSopenharmony_ci# -m max-pages maximum pages to tie up before unpoisoning
6f08c3bdfSopenharmony_ci# -s seed      random seed
7f08c3bdfSopenharmony_ci# Note: running this for too long may still run out of memory
8f08c3bdfSopenharmony_ci# because unpoison cannot completely undo what soft offline
9f08c3bdfSopenharmony_ci# does to larger free memory areas (TBD in the kernel)
10f08c3bdfSopenharmony_ci# Author: Andi Kleen
11f08c3bdfSopenharmony_ci
12f08c3bdfSopenharmony_ci# fixme: uses time seed, non reproducible
13f08c3bdfSopenharmony_ci
14f08c3bdfSopenharmony_ci#mount -t debugfs none /debug
15f08c3bdfSopenharmony_ci
16f08c3bdfSopenharmony_ciTHRESH=1000
17f08c3bdfSopenharmony_ciSEED=""
18f08c3bdfSopenharmony_ciRUNTIME=""
19f08c3bdfSopenharmony_ciDEBUG=/sys/kernel/debug
20f08c3bdfSopenharmony_ci
21f08c3bdfSopenharmony_cifail() { 
22f08c3bdfSopenharmony_ci	echo "ERROR: $@"
23f08c3bdfSopenharmony_ci	exit 0
24f08c3bdfSopenharmony_ci}
25f08c3bdfSopenharmony_ci
26f08c3bdfSopenharmony_ciusage() { 
27f08c3bdfSopenharmony_ci	echo "Usage:"
28f08c3bdfSopenharmony_ci	echo "random_offline options"
29f08c3bdfSopenharmony_ci	echo -- "-t seconds   runtime in seconds (default unlimited)"
30f08c3bdfSopenharmony_ci	echo -- "-m max-pages maximum pages to tie up before unpoisoning"
31f08c3bdfSopenharmony_ci	echo -- "-s seed      random seed"
32f08c3bdfSopenharmony_ci	fail "Invalid option $1"
33f08c3bdfSopenharmony_ci}
34f08c3bdfSopenharmony_ci
35f08c3bdfSopenharmony_ciwhile getopts "t:m:s:" option ; do
36f08c3bdfSopenharmony_ci	case "$option" in 
37f08c3bdfSopenharmony_ci	t) RUNTIME=$OPTARG ;;
38f08c3bdfSopenharmony_ci	m) THRESH=$OPTARG ;;
39f08c3bdfSopenharmony_ci	s) SEED=$OPTARG ;;
40f08c3bdfSopenharmony_ci	*) usage $option ;;
41f08c3bdfSopenharmony_ci	esac
42f08c3bdfSopenharmony_cidone
43f08c3bdfSopenharmony_ci
44f08c3bdfSopenharmony_ci[ "$(whoami)" != root ] && fail "Not root"
45f08c3bdfSopenharmony_ci[ ! -d $DEBUG/hwpoison ] && mount -t debugfs none $DEBUG
46f08c3bdfSopenharmony_ci[ ! -d $DEBUG/hwpoison ] && fail "No debugfs"
47f08c3bdfSopenharmony_ci[ ! -w /sys/devices/system/memory/soft_offline_page ] && fail "No soft offlining support in kernel"
48f08c3bdfSopenharmony_ci[ ! -w $DEBUG/hwpoison/unpoison-pfn ] && fail "no unpoison support in kernel"
49f08c3bdfSopenharmony_ci
50f08c3bdfSopenharmony_ciend_of_memory() {
51f08c3bdfSopenharmony_ci	for i in /sys/firmware/memmap/* ; do
52f08c3bdfSopenharmony_ci		case "$(< $i/type)" in
53f08c3bdfSopenharmony_ci		"System RAM") ;;
54f08c3bdfSopenharmony_ci		*) continue ;;
55f08c3bdfSopenharmony_ci		esac	
56f08c3bdfSopenharmony_ci	
57f08c3bdfSopenharmony_ci		k=$(< $i/end)
58f08c3bdfSopenharmony_ci		k=${k/0x/}
59f08c3bdfSopenharmony_ci		k=$(echo $k | tr a-z A-Z)
60f08c3bdfSopenharmony_ci
61f08c3bdfSopenharmony_ci		echo "ibase=16; $k/1000" | bc
62f08c3bdfSopenharmony_ci	done | sort -n | tail -n1
63f08c3bdfSopenharmony_ci}
64f08c3bdfSopenharmony_ci
65f08c3bdfSopenharmony_ciE=$(end_of_memory)
66f08c3bdfSopenharmony_ci
67f08c3bdfSopenharmony_ciecho "soft offlining pages upto $E" 
68f08c3bdfSopenharmony_ci
69f08c3bdfSopenharmony_ciunpoison() { 
70f08c3bdfSopenharmony_ci	if [ ! -f offlined ] ; then
71f08c3bdfSopenharmony_ci		return
72f08c3bdfSopenharmony_ci	fi
73f08c3bdfSopenharmony_ci	
74f08c3bdfSopenharmony_ci	echo unpoisioning
75f08c3bdfSopenharmony_ci	while read i ; do 
76f08c3bdfSopenharmony_ci		#echo -n ,
77f08c3bdfSopenharmony_ci		#echo "u $i"
78f08c3bdfSopenharmony_ci		(( utotal++ ))
79f08c3bdfSopenharmony_ci	 	if ! echo $i | sed 's/000$//' > $DEBUG/hwpoison/unpoison-pfn ; then
80f08c3bdfSopenharmony_ci			echo "$i $?" >> unpoison-failed
81f08c3bdfSopenharmony_ci			echo "unpoisioning $i failed: $?"
82f08c3bdfSopenharmony_ci		else
83f08c3bdfSopenharmony_ci			(( usuccess++ ))
84f08c3bdfSopenharmony_ci		fi
85f08c3bdfSopenharmony_ci	done < offlined
86f08c3bdfSopenharmony_ci	echo done
87f08c3bdfSopenharmony_ci	echo
88f08c3bdfSopenharmony_ci}
89f08c3bdfSopenharmony_ci
90f08c3bdfSopenharmony_citrap unpoison 0
91f08c3bdfSopenharmony_ci
92f08c3bdfSopenharmony_ciif [ "$SEED" = "" ] ; then
93f08c3bdfSopenharmony_ci	SEED=$(date +%s)
94f08c3bdfSopenharmony_cifi
95f08c3bdfSopenharmony_ciRANDOM=$SEED
96f08c3bdfSopenharmony_ciecho "Using random seed $SEED"
97f08c3bdfSopenharmony_ci
98f08c3bdfSopenharmony_cistart=$(date +%s)
99f08c3bdfSopenharmony_cifailed=0
100f08c3bdfSopenharmony_ciufailed=0
101f08c3bdfSopenharmony_cisuccess=0
102f08c3bdfSopenharmony_ciusuccess=0
103f08c3bdfSopenharmony_citotal=0
104f08c3bdfSopenharmony_ciutotal=0
105f08c3bdfSopenharmony_ci
106f08c3bdfSopenharmony_cicbefore=$(grep HardwareCorrupted /proc/meminfo)
107f08c3bdfSopenharmony_ci
108f08c3bdfSopenharmony_ci
109f08c3bdfSopenharmony_ci(( k = 0 ))
110f08c3bdfSopenharmony_cirm -f offlined unpoison-failed
111f08c3bdfSopenharmony_ciwhile true ; do 
112f08c3bdfSopenharmony_ci	T=$( 
113f08c3bdfSopenharmony_ci	R=$RANDOM
114f08c3bdfSopenharmony_ci	X=$(echo "obase=16; ($R%$E)*4096"  | bc)
115f08c3bdfSopenharmony_ci	echo 0x$X
116f08c3bdfSopenharmony_ci	) 
117f08c3bdfSopenharmony_ci	#echo "p $T" 
118f08c3bdfSopenharmony_ci	(( total++ )) 
119f08c3bdfSopenharmony_ci	if echo 2>/dev/null $T >/sys/devices/system/memory/soft_offline_page ; then
120f08c3bdfSopenharmony_ci		echo $T >> offlined
121f08c3bdfSopenharmony_ci		(( success++ )) 
122f08c3bdfSopenharmony_ci	else
123f08c3bdfSopenharmony_ci		#echo offlining $T failed $?
124f08c3bdfSopenharmony_ci		(( failed++ ))
125f08c3bdfSopenharmony_ci		true
126f08c3bdfSopenharmony_ci	fi
127f08c3bdfSopenharmony_ci	#echo -n . 
128f08c3bdfSopenharmony_ci
129f08c3bdfSopenharmony_ci	(( k++ )) 
130f08c3bdfSopenharmony_ci	if [ $k -gt $THRESH ] ; then
131f08c3bdfSopenharmony_ci		unpoison
132f08c3bdfSopenharmony_ci		(( k = 0 ))
133f08c3bdfSopenharmony_ci		rm offlined
134f08c3bdfSopenharmony_ci	fi
135f08c3bdfSopenharmony_ci
136f08c3bdfSopenharmony_ci	if [ ! -z "$RUNTIME" ] ; then
137f08c3bdfSopenharmony_ci		((DIFF = $(date +%s) - $start))
138f08c3bdfSopenharmony_ci		if [ $DIFF -gt "$RUNTIME" ] ; then
139f08c3bdfSopenharmony_ci			echo time over
140f08c3bdfSopenharmony_ci			trap 0
141f08c3bdfSopenharmony_ci			break
142f08c3bdfSopenharmony_ci		fi
143f08c3bdfSopenharmony_ci	fi
144f08c3bdfSopenharmony_cidone	
145f08c3bdfSopenharmony_ci
146f08c3bdfSopenharmony_ciif [ -f unpoison-failed ] ; then
147f08c3bdfSopenharmony_ci	ufailed=$(wc -l unpoison-failed | awk ' {print $1}')
148f08c3bdfSopenharmony_cifi
149f08c3bdfSopenharmony_ciecho "soft-poison: success $success failed $failed of total $total"
150f08c3bdfSopenharmony_ciecho "unpoison-failed: success $usuccess failed $ufailed of total $utotal"
151f08c3bdfSopenharmony_ciecho "poisoned before: $cbefore"
152f08c3bdfSopenharmony_ciecho -n "poisoned after: "
153f08c3bdfSopenharmony_cigrep HardwareCorrupted /proc/meminfo
154f08c3bdfSopenharmony_ci
155f08c3bdfSopenharmony_ci### xxx automatic success/failure criteria?
156f08c3bdfSopenharmony_ci
157