1f08c3bdfSopenharmony_ci#!/bin/bash 2f08c3bdfSopenharmony_ci# randomly soft offline pages 3f08c3bdfSopenharmony_ci# random_offline options 4f08c3bdfSopenharmony_ci# -t seconds runtime in seconds (default unlimited) 5f08c3bdfSopenharmony_ci# -m max-pages maximum pages to tie up before unpoisoning 6f08c3bdfSopenharmony_ci# -s seed random seed 7f08c3bdfSopenharmony_ci# Note: running this for too long may still run out of memory 8f08c3bdfSopenharmony_ci# because unpoison cannot completely undo what soft offline 9f08c3bdfSopenharmony_ci# does to larger free memory areas (TBD in the kernel) 10f08c3bdfSopenharmony_ci# Author: Andi Kleen 11f08c3bdfSopenharmony_ci 12f08c3bdfSopenharmony_ci# fixme: uses time seed, non reproducible 13f08c3bdfSopenharmony_ci 14f08c3bdfSopenharmony_ci#mount -t debugfs none /debug 15f08c3bdfSopenharmony_ci 16f08c3bdfSopenharmony_ciTHRESH=1000 17f08c3bdfSopenharmony_ciSEED="" 18f08c3bdfSopenharmony_ciRUNTIME="" 19f08c3bdfSopenharmony_ciDEBUG=/sys/kernel/debug 20f08c3bdfSopenharmony_ci 21f08c3bdfSopenharmony_cifail() { 22f08c3bdfSopenharmony_ci echo "ERROR: $@" 23f08c3bdfSopenharmony_ci exit 0 24f08c3bdfSopenharmony_ci} 25f08c3bdfSopenharmony_ci 26f08c3bdfSopenharmony_ciusage() { 27f08c3bdfSopenharmony_ci echo "Usage:" 28f08c3bdfSopenharmony_ci echo "random_offline options" 29f08c3bdfSopenharmony_ci echo -- "-t seconds runtime in seconds (default unlimited)" 30f08c3bdfSopenharmony_ci echo -- "-m max-pages maximum pages to tie up before unpoisoning" 31f08c3bdfSopenharmony_ci echo -- "-s seed random seed" 32f08c3bdfSopenharmony_ci fail "Invalid option $1" 33f08c3bdfSopenharmony_ci} 34f08c3bdfSopenharmony_ci 35f08c3bdfSopenharmony_ciwhile getopts "t:m:s:" option ; do 36f08c3bdfSopenharmony_ci case "$option" in 37f08c3bdfSopenharmony_ci t) RUNTIME=$OPTARG ;; 38f08c3bdfSopenharmony_ci m) THRESH=$OPTARG ;; 39f08c3bdfSopenharmony_ci s) SEED=$OPTARG ;; 40f08c3bdfSopenharmony_ci *) usage $option ;; 41f08c3bdfSopenharmony_ci esac 42f08c3bdfSopenharmony_cidone 43f08c3bdfSopenharmony_ci 44f08c3bdfSopenharmony_ci[ "$(whoami)" != root ] && fail "Not root" 45f08c3bdfSopenharmony_ci[ ! -d $DEBUG/hwpoison ] && mount -t debugfs none $DEBUG 46f08c3bdfSopenharmony_ci[ ! -d $DEBUG/hwpoison ] && fail "No debugfs" 47f08c3bdfSopenharmony_ci[ ! -w /sys/devices/system/memory/soft_offline_page ] && fail "No soft offlining support in kernel" 48f08c3bdfSopenharmony_ci[ ! -w $DEBUG/hwpoison/unpoison-pfn ] && fail "no unpoison support in kernel" 49f08c3bdfSopenharmony_ci 50f08c3bdfSopenharmony_ciend_of_memory() { 51f08c3bdfSopenharmony_ci for i in /sys/firmware/memmap/* ; do 52f08c3bdfSopenharmony_ci case "$(< $i/type)" in 53f08c3bdfSopenharmony_ci "System RAM") ;; 54f08c3bdfSopenharmony_ci *) continue ;; 55f08c3bdfSopenharmony_ci esac 56f08c3bdfSopenharmony_ci 57f08c3bdfSopenharmony_ci k=$(< $i/end) 58f08c3bdfSopenharmony_ci k=${k/0x/} 59f08c3bdfSopenharmony_ci k=$(echo $k | tr a-z A-Z) 60f08c3bdfSopenharmony_ci 61f08c3bdfSopenharmony_ci echo "ibase=16; $k/1000" | bc 62f08c3bdfSopenharmony_ci done | sort -n | tail -n1 63f08c3bdfSopenharmony_ci} 64f08c3bdfSopenharmony_ci 65f08c3bdfSopenharmony_ciE=$(end_of_memory) 66f08c3bdfSopenharmony_ci 67f08c3bdfSopenharmony_ciecho "soft offlining pages upto $E" 68f08c3bdfSopenharmony_ci 69f08c3bdfSopenharmony_ciunpoison() { 70f08c3bdfSopenharmony_ci if [ ! -f offlined ] ; then 71f08c3bdfSopenharmony_ci return 72f08c3bdfSopenharmony_ci fi 73f08c3bdfSopenharmony_ci 74f08c3bdfSopenharmony_ci echo unpoisioning 75f08c3bdfSopenharmony_ci while read i ; do 76f08c3bdfSopenharmony_ci #echo -n , 77f08c3bdfSopenharmony_ci #echo "u $i" 78f08c3bdfSopenharmony_ci (( utotal++ )) 79f08c3bdfSopenharmony_ci if ! echo $i | sed 's/000$//' > $DEBUG/hwpoison/unpoison-pfn ; then 80f08c3bdfSopenharmony_ci echo "$i $?" >> unpoison-failed 81f08c3bdfSopenharmony_ci echo "unpoisioning $i failed: $?" 82f08c3bdfSopenharmony_ci else 83f08c3bdfSopenharmony_ci (( usuccess++ )) 84f08c3bdfSopenharmony_ci fi 85f08c3bdfSopenharmony_ci done < offlined 86f08c3bdfSopenharmony_ci echo done 87f08c3bdfSopenharmony_ci echo 88f08c3bdfSopenharmony_ci} 89f08c3bdfSopenharmony_ci 90f08c3bdfSopenharmony_citrap unpoison 0 91f08c3bdfSopenharmony_ci 92f08c3bdfSopenharmony_ciif [ "$SEED" = "" ] ; then 93f08c3bdfSopenharmony_ci SEED=$(date +%s) 94f08c3bdfSopenharmony_cifi 95f08c3bdfSopenharmony_ciRANDOM=$SEED 96f08c3bdfSopenharmony_ciecho "Using random seed $SEED" 97f08c3bdfSopenharmony_ci 98f08c3bdfSopenharmony_cistart=$(date +%s) 99f08c3bdfSopenharmony_cifailed=0 100f08c3bdfSopenharmony_ciufailed=0 101f08c3bdfSopenharmony_cisuccess=0 102f08c3bdfSopenharmony_ciusuccess=0 103f08c3bdfSopenharmony_citotal=0 104f08c3bdfSopenharmony_ciutotal=0 105f08c3bdfSopenharmony_ci 106f08c3bdfSopenharmony_cicbefore=$(grep HardwareCorrupted /proc/meminfo) 107f08c3bdfSopenharmony_ci 108f08c3bdfSopenharmony_ci 109f08c3bdfSopenharmony_ci(( k = 0 )) 110f08c3bdfSopenharmony_cirm -f offlined unpoison-failed 111f08c3bdfSopenharmony_ciwhile true ; do 112f08c3bdfSopenharmony_ci T=$( 113f08c3bdfSopenharmony_ci R=$RANDOM 114f08c3bdfSopenharmony_ci X=$(echo "obase=16; ($R%$E)*4096" | bc) 115f08c3bdfSopenharmony_ci echo 0x$X 116f08c3bdfSopenharmony_ci ) 117f08c3bdfSopenharmony_ci #echo "p $T" 118f08c3bdfSopenharmony_ci (( total++ )) 119f08c3bdfSopenharmony_ci if echo 2>/dev/null $T >/sys/devices/system/memory/soft_offline_page ; then 120f08c3bdfSopenharmony_ci echo $T >> offlined 121f08c3bdfSopenharmony_ci (( success++ )) 122f08c3bdfSopenharmony_ci else 123f08c3bdfSopenharmony_ci #echo offlining $T failed $? 124f08c3bdfSopenharmony_ci (( failed++ )) 125f08c3bdfSopenharmony_ci true 126f08c3bdfSopenharmony_ci fi 127f08c3bdfSopenharmony_ci #echo -n . 128f08c3bdfSopenharmony_ci 129f08c3bdfSopenharmony_ci (( k++ )) 130f08c3bdfSopenharmony_ci if [ $k -gt $THRESH ] ; then 131f08c3bdfSopenharmony_ci unpoison 132f08c3bdfSopenharmony_ci (( k = 0 )) 133f08c3bdfSopenharmony_ci rm offlined 134f08c3bdfSopenharmony_ci fi 135f08c3bdfSopenharmony_ci 136f08c3bdfSopenharmony_ci if [ ! -z "$RUNTIME" ] ; then 137f08c3bdfSopenharmony_ci ((DIFF = $(date +%s) - $start)) 138f08c3bdfSopenharmony_ci if [ $DIFF -gt "$RUNTIME" ] ; then 139f08c3bdfSopenharmony_ci echo time over 140f08c3bdfSopenharmony_ci trap 0 141f08c3bdfSopenharmony_ci break 142f08c3bdfSopenharmony_ci fi 143f08c3bdfSopenharmony_ci fi 144f08c3bdfSopenharmony_cidone 145f08c3bdfSopenharmony_ci 146f08c3bdfSopenharmony_ciif [ -f unpoison-failed ] ; then 147f08c3bdfSopenharmony_ci ufailed=$(wc -l unpoison-failed | awk ' {print $1}') 148f08c3bdfSopenharmony_cifi 149f08c3bdfSopenharmony_ciecho "soft-poison: success $success failed $failed of total $total" 150f08c3bdfSopenharmony_ciecho "unpoison-failed: success $usuccess failed $ufailed of total $utotal" 151f08c3bdfSopenharmony_ciecho "poisoned before: $cbefore" 152f08c3bdfSopenharmony_ciecho -n "poisoned after: " 153f08c3bdfSopenharmony_cigrep HardwareCorrupted /proc/meminfo 154f08c3bdfSopenharmony_ci 155f08c3bdfSopenharmony_ci### xxx automatic success/failure criteria? 156f08c3bdfSopenharmony_ci 157