Subversion Repositories configs

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
34 - 1
#!/bin/bash
2
#
3
# Bring up/down the kernel RDMA stack
4
#
5
# chkconfig: - 05 95
6
# description: Loads/Unloads InfiniBand and iWARP kernel modules
7
# config:	/etc/rdma/rdma.conf
8
#
9
### BEGIN INIT INFO
10
# Provides:       rdma
11
# Default-Stop: 0 1 2 3 4 5 6
12
# Required-Start:
13
# Required-Stop:
14
# Short-Description: Loads and unloads the InfiniBand and iWARP kernel modules
15
# Description: Loads and unloads the InfiniBand and iWARP kernel modules
16
### END INIT INFO
17
 
18
CONFIG=/etc/rdma/rdma.conf
19
 
20
. /etc/rc.d/init.d/functions
21
 
22
LOAD_ULP_MODULES=""
23
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
24
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
25
LOAD_CORE_MODULES="ib_addr ib_core ib_mad ib_sa"
26
 
27
if [ -f $CONFIG ]; then
28
    . $CONFIG
29
 
30
    if [ "${RDS_LOAD}" == "yes" ]; then
31
        IPOIB_LOAD=yes
32
    fi
33
 
34
    if [ "${IPOIB_LOAD}" == "yes" ]; then
35
	LOAD_ULP_MODULES="ib_ipoib"
36
    fi
37
 
38
    if [ "${RDS_LOAD}" == "yes" ]; then
39
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"
40
    fi
41
 
42
    if [ "${SRP_LOAD}" == "yes" ]; then
43
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
44
    fi
45
 
46
    if [ "${ISER_LOAD}" == "yes" ]; then
47
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
48
    fi
49
else
50
    LOAD_ULP_MODULES="ib_ipoib"
51
fi
52
 
53
UNLOAD_ULP_MODULES="ib_iser ib_srp rds_rdma ib_ipoib"
54
UNLOAD_HW_MODULES="iw_c2 iw_cxgb4 iw_cxgb3 iw_nes ib_ehca ib_ipath ib_mthca mlx4_ib ib_qib mlx5_ib mlx5_core ocrdma"
55
UNLOAD_CORE_USER_MODULES="rdma_ucm ib_ucm ib_uverbs ib_umad"
56
UNLOAD_CORE_CM_MODULES="rdma_cm ib_cm iw_cm"
57
UNLOAD_CORE_MODULES="ib_sa ib_mad ib_core ib_addr"
58
 
59
# We support renaming ib? interfaces to named interfaces, so do scan for link
60
# type infiniband and pull out whatever names are present
61
interfaces=`LANG=C ip -o link | awk -F ': ' -vIGNORECASE=1 '/link\/infiniband/ { print $2 }'`
62
 
63
get_configs_by_type ()
64
{
65
	LANG=C grep -E -i -l "^[[:space:]]*TYPE=${1}" \
66
			/etc/sysconfig/network-scripts/ifcfg-* \
67
			| LC_ALL=C sed -e "$__sed_discard_ignored_files" \
68
			| cut -f 3 -d '-'
69
}
70
 
71
# If module $1 is loaded return - 0 else - 1
72
is_module()
73
{
74
    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
75
    return $?
76
}
77
 
78
load_modules()
79
{
80
    local RC=0
81
 
82
    for module in $*; do
83
	if ! is_module $module; then
84
	    /sbin/modprobe $module
85
	    res=$?
86
	    RC=$[ $RC + $res ]
87
	    if [ $res -ne 0 ]; then
88
		echo
89
		echo -n "Failed to load module $mod"
90
	    fi
91
	fi
92
    done
93
    return $RC
94
}
95
 
96
unload_module()
97
{
98
    local mod=$1
99
    # Unload module $1
100
    if is_module $mod; then
101
	/sbin/rmmod $mod > /dev/null 2>&1
102
	if [ $? -ne 0 ]; then
103
	    echo
104
	    echo "Failed to unload $mod"
105
	    return 1
106
	fi
107
    fi
108
    return 0
109
}
110
 
111
# This function is a horrible hack to work around BIOS authors that should
112
# be shot.  Specifically, certain BIOSes will map the entire 4GB address
113
# space as write-back cacheable when the machine has 4GB or more of RAM, and
114
# then they will exclude the reserved PCI I/O addresses from that 4GB
115
# cacheable mapping by making on overlapping uncacheable mapping.  However,
116
# once you do that, it is then impossible to set *any* of the PCI I/O
117
# address space as write-combining.  This is an absolute death-knell to
118
# certain IB hardware.  So, we unroll this mapping here.  Instead of
119
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
120
# a series of discreet mappings that effectively are the same as the 4GB
121
# mapping minus the hole, and then we delete the uncacheable mappings that
122
# are used to punch the hole.  This then leaves the PCI I/O address space
123
# unregistered (which defaults it to uncacheable), but available for
124
# write-combining mappings where needed.
125
check_mtrr_registers()
126
{
127
    # If we actually change the mtrr registers, then the awk script will
128
    # return true, and we need to unload the ib_ipath module if it's already
129
    # loaded.  The udevtrigger in load_hardware_modules will immediately
130
    # reload the ib_ipath module for us, so there shouldn't be a problem.
131
    [ -f /proc/mtrr -a -f /etc/rdma/fixup-mtrr.awk ] &&
132
	awk -f /etc/rdma/fixup-mtrr.awk /proc/mtrr 2>/dev/null &&
133
	if is_module ib_ipath; then
134
		/sbin/rmmod ib_ipath
135
	fi
136
}
137
 
138
load_hardware_modules()
139
{
140
    local -i RC=0
141
 
142
    [ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
143
    # WARNING!!  If you are using this script to take down and bring up
144
    # your IB interfaces on a machine that uses more than one low level
145
    # Infiniband hardware driver, then there is no guarantee that the
146
    # ordering of rdma interfaces after you take down and bring up the
147
    # stack will be the same as the ordering of the interfaces on a
148
    # clean boot.
149
    #
150
    # We match both class NETWORK and class INFINIBAND devices since our
151
    # iWARP hardware is listed under class NETWORK.  The side effect of
152
    # this is that we might cause a non-iWARP network driver to be loaded.
153
    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000
154
    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x0c0600
155
    udevadm settle
156
    if [ -r /proc/device-tree ]; then
157
	if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
158
	    if ! is_module ib_ehca; then
159
		load_modules ib_ehca
160
		RC+=$?
161
	    fi
162
	fi
163
    fi
164
    if is_module cxgb3 -a ! is_module iw_cxgb3; then
165
	load_modules iw_cxgb3
166
	RC+=$?
167
    fi
168
    if is_module cxgb4 -a ! is_module iw_cxgb4; then
169
	load_modules iw_cxgb4
170
	RC+=$?
171
    fi
172
    if is_module mlx4_core -a ! is_module mlx4_ib; then
173
	load_modules mlx4_ib
174
	RC+=$?
175
    fi
176
    if is_module mlx5_core -a ! is_module mlx5_ib; then
177
	load_modules mlx5_ib
178
	RC+=$?
179
    fi
180
    if is_module be2net -a ! is_module ocrdma; then
181
    	load_modules ocrdma
182
	RC+=$?
183
    fi
184
    if is_module enic -a ! is_module usnic_verbs; then
185
    	load_modules usnic_verbs
186
	RC+=$?
187
    fi
188
    return $RC
189
}
190
 
191
errata_58()
192
{
193
    # Check AMD chipset issue Errata #58
194
    if test -x /sbin/lspci && test -x /sbin/setpci; then
195
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
196
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
197
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
198
	    CURVAL=`/sbin/setpci -d 1022:1100 69`
199
	    for val in $CURVAL
200
	    do
201
		if [ "${val}" != "c0" ]; then
202
		    /sbin/setpci -d 1022:1100 69=c0
203
		    if [ $? -eq 0 ]; then
204
			break
205
		    else
206
			echo "Failed to apply AMD-8131 Errata #58 workaround"
207
		    fi
208
		fi
209
	    done
210
	fi
211
    fi
212
}
213
 
214
errata_56()
215
{
216
    # Check AMD chipset issue Errata #56
217
    if test -x /sbin/lspci && test -x /sbin/setpci; then
218
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
219
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
220
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
221
	    bus=""
222
	    # Look for devices AMD-8131
223
	    for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
224
	    do
225
		bus=`/sbin/setpci -s $dev 19`
226
		rev=`/sbin/setpci -s $dev 8`
227
		# Look for Tavor attach to secondary bus of this devices
228
		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
229
		do
230
		    if [ $rev -lt 13 ]; then
231
			/sbin/setpci -d 15b3:5a44 72=14
232
			if [ $? -eq 0 ]; then
233
			    break
234
			else
235
			    echo
236
			    echo "Failed to apply AMD-8131 Errata #56 workaround"
237
			fi
238
		    else
239
			continue
240
		    fi
241
		    # If more than one device is on the bus the issue a
242
		    # warning
243
		    num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`
244
		    if [ $num -gt 1 ]; then
245
			echo "Warning: your current PCI-X configuration might be incorrect."
246
			echo "see AMD-8131 Errata 56 for more details."
247
		    fi
248
		done
249
	    done
250
	fi
251
    fi
252
}
253
 
254
start()
255
{
256
    local RC=0
257
    local loaded=0
258
 
259
    echo -n "Loading OpenIB kernel modules:"
260
 
261
    load_hardware_modules
262
    RC+=$?
263
    load_modules $LOAD_CORE_MODULES
264
    RC+=$?
265
    load_modules $LOAD_CORE_CM_MODULES
266
    RC+=$?
267
    load_modules $LOAD_CORE_USER_MODULES
268
    RC+=$?
269
    load_modules $LOAD_ULP_MODULES
270
    RC+=$?
271
    if is_module rds && ! is_module rds_rdma && test "${RDS_LOAD}" = "yes"; then
272
	load_modules rds_rdma
273
	RC+=$?
274
    fi
275
 
276
    # Add node description to sysfs
277
    IBSYSDIR="/sys/class/infiniband"
278
    if [ -d ${IBSYSDIR} ]; then
279
        pushd ${IBSYSDIR} >/dev/null 2>&1
280
	for hca in *
281
	do
282
	    if [ -w ${hca}/node_desc ]; then
283
	    	echo -n "$(hostname -s) ${hca}" >> ${hca}/node_desc 2> /dev/null
284
	    fi
285
	done
286
	popd >/dev/null 2>&1
287
    fi
288
 
289
    errata_58
290
    errata_56
291
 
292
    touch /var/lock/subsys/rdma
293
    [ $RC -eq 0 ] && echo_success || echo_failure
294
    echo
295
    return $RC
296
}
297
 
298
stop()
299
{
300
    # Check if applications which use infiniband are running
301
    local apps="ibacm opensm osmtest srp_daemon"
302
    local pid
303
    local RC=0
304
 
305
    echo -n "Unloading OpenIB kernel modules:"
306
 
307
    for app in $apps
308
    do
309
    	if ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); then
310
	    echo
311
	    echo "Found $app running."
312
	    echo "Please stop all RDMA applications before downing the stack."
313
	    echo_failure
314
	    echo
315
	    return 1
316
	fi
317
    done
318
 
319
    if is_module svcrdma; then
320
    	echo "NFSoRDMA support is still enabled."
321
	echo "Please stop the nfs-rdma service before stopping the rdma service."
322
	echo_failure
323
	echo
324
	return 1
325
    fi
326
 
327
    if ! is_module ib_core; then
328
	# Nothing to do, make sure lock file is gone and return
329
	rm -f /var/lock/subsys/rdma
330
	echo_success
331
	echo
332
	return 0
333
    fi
334
 
335
    # Down all IPoIB interfaces
336
    if is_module ib_ipoib; then
337
	for i in $interfaces
338
	do
339
	    config=/etc/sysconfig/network-scripts/ifcfg-$i
340
	    if [ -e $config ]; then
341
		unset ${SLAVE}
342
		unset ${MASTER}
343
		. $config
344
	        [ "${SLAVE}" = yes -a "${MASTER}" ] && ifdown ${MASTER} >/dev/null 2>&1
345
	        ifdown $i >/dev/null 2>&1
346
	    else
347
		ip link set $i down
348
	    fi
349
	done
350
    fi
351
    # Small sleep to let the ifdown settle before we remove any modules
352
    sleep 1
353
 
354
    # Unload OpenIB modules
355
    MODULES="$UNLOAD_ULP_MODULES $UNLOAD_CORE_USER_MODULES"
356
    MODULES="$MODULES $UNLOAD_CORE_CM_MODULES"
357
    for mod in $MODULES
358
    do
359
	unload_module $mod
360
	RC=$[ $RC + $? ]
361
    done
362
    # Insert a sleep here for all the ULP modules to have been fully removed
363
    # before proceeding to unload the driver modules
364
    sleep 1
365
    MODULES="$UNLOAD_HW_MODULES $UNLOAD_CORE_MODULES"
366
    for mod in $MODULES
367
    do
368
	unload_module $mod
369
	RC=$[ $RC + $? ]
370
    done
371
 
372
    rm -f /var/lock/subsys/rdma
373
    [ $RC -eq 0 ] && echo_success || echo_failure
374
    echo
375
    return $RC
376
}
377
 
378
status()
379
{
380
    local -i cnt=0
381
    local -i modules=0
382
    local module=""
383
 
384
    echo -ne "Low level hardware support loaded:\n\t"
385
    for module in $UNLOAD_HW_MODULES; do
386
	if is_module $module; then
387
	    echo -n "$module "
388
	    let cnt++
389
	fi
390
    done
391
    [ $cnt -eq 0 ] && echo -n "none found"
392
    modules+=cnt
393
    echo
394
    echo
395
 
396
    echo -ne "Upper layer protocol modules:\n\t"
397
    cnt=0
398
    for module in $UNLOAD_ULP_MODULES; do
399
	if is_module $module; then
400
	    echo -n "$module "
401
	    let cnt++
402
	fi
403
    done
404
    [ $cnt -eq 0 ] && echo -n "none found"
405
    modules+=cnt
406
    echo
407
    echo
408
 
409
    echo -ne "User space access modules:\n\t"
410
    cnt=0
411
    for module in $UNLOAD_CORE_USER_MODULES; do
412
	if is_module $module; then
413
	    echo -n "$module "
414
	    let cnt++
415
	fi
416
    done
417
    [ $cnt -eq 0 ] && echo -n "none found"
418
    modules+=cnt
419
    echo
420
    echo
421
 
422
    echo -ne "Connection management modules:\n\t"
423
    cnt=0
424
    for module in $UNLOAD_CORE_CM_MODULES; do
425
	if is_module $module; then
426
	    echo -n "$module "
427
	    let cnt++
428
	fi
429
    done
430
    [ $cnt -eq 0 ] && echo -n "none found"
431
    modules+=cnt
432
    echo
433
    echo
434
 
435
    for module in $UNLOAD_CORE_MODULES; do
436
	if is_module $module; then
437
	    let modules++
438
	fi
439
    done
440
 
441
    if is_module ib_ipoib; then
442
	echo -n "Configured IPoIB interfaces: "
443
	configs=$(get_configs_by_type "InfiniBand")
444
	[ -n "$configs" ] && echo $configs || echo -n "none"
445
	echo
446
	echo -n "Currently active IPoIB interfaces: "
447
	cnt=0
448
	for i in $interfaces
449
	do
450
	    ip link show $i | grep -w UP > /dev/null 2>&1
451
	    [ $? -eq 0 ] && echo -n "$i " && let cnt++
452
	done
453
	[ $cnt -eq 0 ] && echo -n "none"
454
	echo
455
    fi
456
 
457
    if [ $modules -eq 0 ]; then
458
	if [ -f /var/lock/subsys/rdma ]; then
459
	    return 2
460
	else
461
	    return 3
462
	fi
463
    else
464
	return 0
465
    fi
466
}
467
 
468
restart ()
469
{
470
    stop
471
    start
472
}
473
 
474
condrestart ()
475
{
476
    [ -e /var/lock/subsys/rdma ] && restart || return 0
477
}
478
 
479
usage ()
480
{
481
    echo
482
    echo "Usage: `basename $0` {start|stop|restart|condrestart|try-restart|force-reload|status}"
483
    echo
484
    return 2
485
}
486
 
487
case $1 in
488
    start|stop|restart|condrestart|try-restart|force-reload)
489
	[ `id -u` != "0" ] && exit 4 ;;
490
esac
491
 
492
case $1 in
493
    start) start; RC=$? ;;
494
    stop) stop; RC=$? ;;
495
    restart) restart; RC=$? ;;
496
    reload) RC=3 ;;
497
    condrestart) condrestart; RC=$? ;;
498
    try-restart) condrestart; RC=$? ;;
499
    force-reload) condrestart; RC=$? ;;
500
    status) status; RC=$? ;;
501
    *) usage; RC=$? ;;
502
esac
503
 
504
exit $RC