33 |
- |
1 |
#!/bin/bash
|
|
|
2 |
# cache error trigger. This shell script is executed by mcelog in daemon mode
|
|
|
3 |
# when a CPU reports excessive corrected cache errors. This could be a indication
|
|
|
4 |
# for future uncorrected errors.
|
|
|
5 |
#
|
|
|
6 |
# environment:
|
|
|
7 |
# MESSAGE Human readable error message
|
|
|
8 |
# CPU Linux CPU number that triggered the error
|
|
|
9 |
# LEVEL Cache level affected by error
|
|
|
10 |
# TYPE Cache type affected by error (Data,Instruction,Generic)
|
|
|
11 |
# AFFECTED_CPUS List of CPUs sharing the affected cache
|
|
|
12 |
# SOCKETID Socket ID of affected CPU
|
|
|
13 |
#
|
|
|
14 |
# note: will run as mcelog configured user
|
|
|
15 |
# this can be changed in mcelog.conf
|
|
|
16 |
|
|
|
17 |
#
|
|
|
18 |
# offline the CPUs sharing the affected cache
|
|
|
19 |
#
|
|
|
20 |
EXIT=0
|
|
|
21 |
|
|
|
22 |
for i in $AFFECTED_CPUS ; do
|
|
|
23 |
logger -s -p daemon.crit -t mcelog "Offlining CPU $i due to cache error threshold"
|
|
|
24 |
F=$(printf "/sys/devices/system/cpu/cpu%d/online" $i)
|
|
|
25 |
echo 0 > $F
|
|
|
26 |
if [ "$(< $F)" != "0" ] ; then
|
|
|
27 |
logger -s -p daemon.warn -t mcelog "Offlining CPU $i failed"
|
|
|
28 |
EXIT=1
|
|
|
29 |
fi
|
|
|
30 |
done
|
|
|
31 |
|
|
|
32 |
[ -x ./cache-error-trigger.local ] && . ./cache-error-trigger.local
|
|
|
33 |
|
|
|
34 |
exit $EXIT
|