4 |
- |
1 |
#
|
|
|
2 |
# Example config file for mcelog
|
|
|
3 |
# mcelog is the user space backend that decodes and process machine check events
|
|
|
4 |
# (cpu hardware errors) reported by the CPU to the kernel
|
|
|
5 |
#
|
|
|
6 |
|
|
|
7 |
# general format
|
|
|
8 |
#optionname = value
|
|
|
9 |
# white space is not allowed in value currently, except at the end where it is dropped
|
|
|
10 |
#
|
|
|
11 |
|
58 |
- |
12 |
# In general all command line options that are not commands work here.
|
|
|
13 |
# See man mcelog or mcelog --help for a list.
|
4 |
- |
14 |
# e.g. to enable the --no-syslog option use
|
|
|
15 |
#no-syslog = yes (or no to disable)
|
|
|
16 |
# when the option has a argument
|
|
|
17 |
#logfile = /tmp/logfile
|
|
|
18 |
|
|
|
19 |
# by default, disable extended error logging on newer Intel processors
|
|
|
20 |
no-imc-log = yes
|
|
|
21 |
|
58 |
- |
22 |
# below are the options which are not command line options.
|
4 |
- |
23 |
|
|
|
24 |
# Set CPU type for which mcelog decodes events:
|
|
|
25 |
#cpu = type
|
58 |
- |
26 |
# For valid values for type please see mcelog --help.
|
4 |
- |
27 |
# If this value is set incorrectly the decoded output will be likely incorrect.
|
58 |
- |
28 |
# By default when this parameter is not set mcelog uses the CPU it is running on
|
4 |
- |
29 |
# on very new kernels the mcelog events reported by the kernel also carry
|
|
|
30 |
# the CPU type which is used too when available and not overriden.
|
|
|
31 |
|
|
|
32 |
# Enable daemon mode:
|
|
|
33 |
#daemon = yes
|
|
|
34 |
# By default mcelog just processes the currently pending events and exits.
|
58 |
- |
35 |
# In daemon mode it will keep running as a daemon in the background and poll
|
4 |
- |
36 |
# the kernel for events and then decode them.
|
|
|
37 |
|
58 |
- |
38 |
# Filter out known broken events by default.
|
4 |
- |
39 |
filter = yes
|
58 |
- |
40 |
# Don't log memory errors individually.
|
|
|
41 |
# They still get accounted if that is enabled.
|
4 |
- |
42 |
#filter-memory-errors = yes
|
|
|
43 |
|
|
|
44 |
# output in undecoded raw format to be easier machine readable
|
58 |
- |
45 |
# (default is decoded).
|
4 |
- |
46 |
#raw = yes
|
|
|
47 |
|
|
|
48 |
# Set CPU Mhz to decode uptime from time stamp counter (output
|
|
|
49 |
# unreliable, not needed on new kernels which report the event time
|
|
|
50 |
# directly. A lot of systems don't have a linear time stamp clock
|
|
|
51 |
# and the output is wrong then.
|
|
|
52 |
# Normally mcelog tries to figure out if it the TSC is reliable
|
|
|
53 |
# and only uses the current frequency then.
|
|
|
54 |
# Setting a frequency forces timestamp decoding.
|
|
|
55 |
# This setting is obsolete with modern kernels which report the time
|
|
|
56 |
# directly.
|
|
|
57 |
#cpumhz = 1800.00
|
|
|
58 |
|
|
|
59 |
# log output options
|
|
|
60 |
# Log decoded machine checks in syslog (default stdout or syslog for daemon)
|
|
|
61 |
#syslog = yes
|
|
|
62 |
# Log decoded machine checks in syslog with error level
|
|
|
63 |
#syslog-error = yes
|
|
|
64 |
# Never log anything to syslog
|
|
|
65 |
#no-syslog = yes
|
|
|
66 |
# Append log output to logfile instead of stdout. Only when no syslog logging is active
|
|
|
67 |
#logfile = filename
|
|
|
68 |
|
58 |
- |
69 |
# Use SMBIOS information to decode DIMMs (needs root).
|
|
|
70 |
# This function is not recommended to use right now and generally not needed.
|
4 |
- |
71 |
# The exception is memdb prepopulation, which is configured separately below.
|
|
|
72 |
#dmi = no
|
|
|
73 |
|
58 |
- |
74 |
# When in daemon mode run as this user after set up.
|
|
|
75 |
# Note that the triggers will run as this user too.
|
|
|
76 |
# Setting this to non root will mean that triggers cannot take some corrective
|
|
|
77 |
# action, like offlining objects.
|
4 |
- |
78 |
#run-credentials-user = root
|
58 |
- |
79 |
|
4 |
- |
80 |
# group to run as daemon with
|
|
|
81 |
# default to the group of the run-credentials-user
|
|
|
82 |
#run-credentials-group = nobody
|
|
|
83 |
|
|
|
84 |
[server]
|
|
|
85 |
# user allowed to access client socket.
|
|
|
86 |
# when set to * match any
|
58 |
- |
87 |
# root is always allowed to access.
|
4 |
- |
88 |
# default: root only
|
|
|
89 |
client-user = root
|
|
|
90 |
# group allowed to access mcelog
|
58 |
- |
91 |
# When no group is configured any group matches (but still user checking).
|
4 |
- |
92 |
# when set to * match any
|
|
|
93 |
#client-group = root
|
58 |
- |
94 |
# Path to the unix socket for client<->server communication.
|
|
|
95 |
# When no socket-path is configured the server will not start
|
4 |
- |
96 |
#socket-path = /var/run/mcelog-client
|
58 |
- |
97 |
# When mcelog starts it checks if a server is already running. This configures the timeout
|
4 |
- |
98 |
# for this check.
|
|
|
99 |
#initial-ping-timeout = 2
|
|
|
100 |
#
|
|
|
101 |
[dimm]
|
|
|
102 |
# Is the in memory DIMM error tracking enabled?
|
|
|
103 |
# Only works on systems with integrated memory controller and
|
58 |
- |
104 |
# which are supported.
|
|
|
105 |
# Only takes effect in daemon mode.
|
4 |
- |
106 |
dimm-tracking-enabled = yes
|
58 |
- |
107 |
# Use DMI information from the BIOS to prepopulate DIMM database.
|
4 |
- |
108 |
# Note this might not work with all BIOS and requires mcelog to run as root.
|
|
|
109 |
# Alternative is to let mcelog create DIMM objects on demand.
|
|
|
110 |
dmi-prepopulate = yes
|
|
|
111 |
#
|
58 |
- |
112 |
# Execute these triggers when the rate of corrected or uncorrected
|
|
|
113 |
# Errors per DIMM exceeds the threshold.
|
4 |
- |
114 |
# Note when the hardware does not report DIMMs this might also
|
58 |
- |
115 |
# be per channel.
|
4 |
- |
116 |
# The default of 10/24h is reasonable for server quality
|
58 |
- |
117 |
# DDR3 DIMMs as of 2009/10.
|
4 |
- |
118 |
#uc-error-trigger = dimm-error-trigger
|
|
|
119 |
uc-error-threshold = 1 / 24h
|
|
|
120 |
#ce-error-trigger = dimm-error-trigger
|
|
|
121 |
ce-error-threshold = 10 / 24h
|
|
|
122 |
|
|
|
123 |
[socket]
|
58 |
- |
124 |
# Enable memory error accounting per socket.
|
4 |
- |
125 |
socket-tracking-enabled = yes
|
58 |
- |
126 |
|
|
|
127 |
# Threshold and trigger for uncorrected memory errors on a socket.
|
4 |
- |
128 |
# mem-uc-error-trigger = socket-memory-error-trigger
|
58 |
- |
129 |
|
|
|
130 |
#mem-uc-error-threshold = 100 / 24h
|
|
|
131 |
|
|
|
132 |
# Trigger script for corrected memory errors on a socket.
|
|
|
133 |
mem-ce-error-trigger = socket-memory-error-trigger
|
|
|
134 |
|
|
|
135 |
# Threshold on when to trigger a correct error for the socket.
|
|
|
136 |
|
|
|
137 |
mem-ce-error-threshold = 100 / 24h
|
|
|
138 |
|
4 |
- |
139 |
# Log socket error threshold explicitely?
|
|
|
140 |
#mem-ce-error-log = yes
|
|
|
141 |
|
58 |
- |
142 |
# Trigger script for uncorrected bus error events
|
34 |
- |
143 |
bus-uc-threshold-trigger = bus-error-trigger
|
58 |
- |
144 |
|
|
|
145 |
# Trigger script for uncorrected IOMCA erors
|
34 |
- |
146 |
iomca-threshold-trigger = iomca-error-trigger
|
58 |
- |
147 |
|
|
|
148 |
# Trigger script for other uncategorized errors
|
34 |
- |
149 |
unknown-threshold-trigger = unknown-error-trigger
|
4 |
- |
150 |
|
|
|
151 |
[cache]
|
58 |
- |
152 |
# Processing of cache error thresholds reported by Intel CPUs.
|
4 |
- |
153 |
#cache-threshold-trigger = cache-error-trigger
|
58 |
- |
154 |
|
4 |
- |
155 |
# Should cache threshold events be logged explicitely?
|
|
|
156 |
#cache-threshold-log = yes
|
|
|
157 |
|
|
|
158 |
[page]
|
58 |
- |
159 |
# Memory error accouting per 4K memory page.
|
|
|
160 |
# Threshold for the correct memory errors trigger script.
|
4 |
- |
161 |
memory-ce-threshold = 10 / 24h
|
58 |
- |
162 |
|
|
|
163 |
# Trigger script for corrected errors.
|
4 |
- |
164 |
# memory-ce-trigger = page-error-trigger
|
58 |
- |
165 |
|
4 |
- |
166 |
# Should page threshold events be logged explicitely?
|
|
|
167 |
memory-ce-log = yes
|
58 |
- |
168 |
|
4 |
- |
169 |
# specify the internal action in mcelog to exceeding a page error threshold
|
|
|
170 |
# this is done in addition to executing the trigger script if available
|
|
|
171 |
# off no action
|
|
|
172 |
# account only account errors
|
|
|
173 |
# soft try to soft-offline page without killing any processes
|
|
|
174 |
# This requires an uptodate kernel. Might not be successfull.
|
|
|
175 |
# hard try to hard-offline page by killing processes
|
|
|
176 |
# Requires an uptodate kernel. Might not be successfull.
|
|
|
177 |
# soft-then-hard First try to soft offline, then try hard offlining
|
|
|
178 |
#memory-ce-action = off|account|soft|hard|soft-then-hard
|
|
|
179 |
memory-ce-action = soft
|
|
|
180 |
|
|
|
181 |
[trigger]
|
|
|
182 |
# Maximum number of running triggers
|
|
|
183 |
children-max = 2
|
|
|
184 |
# execute triggers in this directory
|
|
|
185 |
directory = /etc/mcelog
|