Subversion Repositories configs

Rev

Rev 34 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
34 - 1
#!/bin/bash
2
#
3
# Bring up/down the kernel RDMA stack
4
#
97 - 5
# chkconfig: 12345 05 95
34 - 6
# description: Loads/Unloads InfiniBand and iWARP kernel modules
7
# config:	/etc/rdma/rdma.conf
8
#
9
### BEGIN INIT INFO
10
# Provides:       rdma
97 - 11
# Default-Start: 1 2 3 4 5
12
# Default-Stop: 0 6
34 - 13
# Required-Start:
14
# Required-Stop:
15
# Short-Description: Loads and unloads the InfiniBand and iWARP kernel modules
16
# Description: Loads and unloads the InfiniBand and iWARP kernel modules
17
### END INIT INFO
18
 
19
CONFIG=/etc/rdma/rdma.conf
20
 
21
. /etc/rc.d/init.d/functions
22
 
23
LOAD_ULP_MODULES=""
24
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
25
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
26
LOAD_CORE_MODULES="ib_addr ib_core ib_mad ib_sa"
27
 
28
if [ -f $CONFIG ]; then
29
    . $CONFIG
30
 
31
    if [ "${RDS_LOAD}" == "yes" ]; then
32
        IPOIB_LOAD=yes
33
    fi
34
 
35
    if [ "${IPOIB_LOAD}" == "yes" ]; then
36
	LOAD_ULP_MODULES="ib_ipoib"
37
    fi
38
 
39
    if [ "${RDS_LOAD}" == "yes" ]; then
40
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"
41
    fi
42
 
43
    if [ "${SRP_LOAD}" == "yes" ]; then
44
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
45
    fi
46
 
47
    if [ "${ISER_LOAD}" == "yes" ]; then
48
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
49
    fi
50
else
51
    LOAD_ULP_MODULES="ib_ipoib"
52
fi
53
 
54
UNLOAD_ULP_MODULES="ib_iser ib_srp rds_rdma ib_ipoib"
55
UNLOAD_HW_MODULES="iw_c2 iw_cxgb4 iw_cxgb3 iw_nes ib_ehca ib_ipath ib_mthca mlx4_ib ib_qib mlx5_ib mlx5_core ocrdma"
56
UNLOAD_CORE_USER_MODULES="rdma_ucm ib_ucm ib_uverbs ib_umad"
57
UNLOAD_CORE_CM_MODULES="rdma_cm ib_cm iw_cm"
58
UNLOAD_CORE_MODULES="ib_sa ib_mad ib_core ib_addr"
59
 
60
# We support renaming ib? interfaces to named interfaces, so do scan for link
61
# type infiniband and pull out whatever names are present
62
interfaces=`LANG=C ip -o link | awk -F ': ' -vIGNORECASE=1 '/link\/infiniband/ { print $2 }'`
63
 
64
get_configs_by_type ()
65
{
66
	LANG=C grep -E -i -l "^[[:space:]]*TYPE=${1}" \
67
			/etc/sysconfig/network-scripts/ifcfg-* \
68
			| LC_ALL=C sed -e "$__sed_discard_ignored_files" \
69
			| cut -f 3 -d '-'
70
}
71
 
72
# If module $1 is loaded return - 0 else - 1
73
is_module()
74
{
75
    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
76
    return $?
77
}
78
 
79
load_modules()
80
{
81
    local RC=0
82
 
83
    for module in $*; do
97 - 84
	if ! /sbin/modinfo $module > /dev/null 2>&1; then
85
	    # do not attempt to load modules which do not exist
86
	    continue
87
	fi
34 - 88
	if ! is_module $module; then
89
	    /sbin/modprobe $module
90
	    res=$?
91
	    RC=$[ $RC + $res ]
92
	    if [ $res -ne 0 ]; then
93
		echo
94
		echo -n "Failed to load module $mod"
95
	    fi
96
	fi
97
    done
98
    return $RC
99
}
100
 
101
unload_module()
102
{
103
    local mod=$1
104
    # Unload module $1
105
    if is_module $mod; then
106
	/sbin/rmmod $mod > /dev/null 2>&1
107
	if [ $? -ne 0 ]; then
108
	    echo
109
	    echo "Failed to unload $mod"
110
	    return 1
111
	fi
112
    fi
113
    return 0
114
}
115
 
116
# This function is a horrible hack to work around BIOS authors that should
117
# be shot.  Specifically, certain BIOSes will map the entire 4GB address
118
# space as write-back cacheable when the machine has 4GB or more of RAM, and
119
# then they will exclude the reserved PCI I/O addresses from that 4GB
120
# cacheable mapping by making on overlapping uncacheable mapping.  However,
121
# once you do that, it is then impossible to set *any* of the PCI I/O
122
# address space as write-combining.  This is an absolute death-knell to
123
# certain IB hardware.  So, we unroll this mapping here.  Instead of
124
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
125
# a series of discreet mappings that effectively are the same as the 4GB
126
# mapping minus the hole, and then we delete the uncacheable mappings that
127
# are used to punch the hole.  This then leaves the PCI I/O address space
128
# unregistered (which defaults it to uncacheable), but available for
129
# write-combining mappings where needed.
130
check_mtrr_registers()
131
{
132
    # If we actually change the mtrr registers, then the awk script will
133
    # return true, and we need to unload the ib_ipath module if it's already
134
    # loaded.  The udevtrigger in load_hardware_modules will immediately
135
    # reload the ib_ipath module for us, so there shouldn't be a problem.
136
    [ -f /proc/mtrr -a -f /etc/rdma/fixup-mtrr.awk ] &&
137
	awk -f /etc/rdma/fixup-mtrr.awk /proc/mtrr 2>/dev/null &&
138
	if is_module ib_ipath; then
139
		/sbin/rmmod ib_ipath
140
	fi
141
}
142
 
143
load_hardware_modules()
144
{
145
    local -i RC=0
146
 
147
    [ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
148
    # WARNING!!  If you are using this script to take down and bring up
149
    # your IB interfaces on a machine that uses more than one low level
150
    # Infiniband hardware driver, then there is no guarantee that the
151
    # ordering of rdma interfaces after you take down and bring up the
152
    # stack will be the same as the ordering of the interfaces on a
153
    # clean boot.
154
    #
155
    # We match both class NETWORK and class INFINIBAND devices since our
156
    # iWARP hardware is listed under class NETWORK.  The side effect of
157
    # this is that we might cause a non-iWARP network driver to be loaded.
158
    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000
159
    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x0c0600
160
    udevadm settle
161
    if [ -r /proc/device-tree ]; then
162
	if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
163
	    if ! is_module ib_ehca; then
164
		load_modules ib_ehca
165
		RC+=$?
166
	    fi
167
	fi
168
    fi
169
    if is_module cxgb3 -a ! is_module iw_cxgb3; then
170
	load_modules iw_cxgb3
171
	RC+=$?
172
    fi
173
    if is_module cxgb4 -a ! is_module iw_cxgb4; then
174
	load_modules iw_cxgb4
175
	RC+=$?
176
    fi
177
    if is_module mlx4_core -a ! is_module mlx4_ib; then
178
	load_modules mlx4_ib
179
	RC+=$?
180
    fi
181
    if is_module mlx5_core -a ! is_module mlx5_ib; then
182
	load_modules mlx5_ib
183
	RC+=$?
184
    fi
185
    if is_module be2net -a ! is_module ocrdma; then
186
    	load_modules ocrdma
187
	RC+=$?
188
    fi
189
    if is_module enic -a ! is_module usnic_verbs; then
190
    	load_modules usnic_verbs
191
	RC+=$?
192
    fi
193
    return $RC
194
}
195
 
196
errata_58()
197
{
198
    # Check AMD chipset issue Errata #58
199
    if test -x /sbin/lspci && test -x /sbin/setpci; then
200
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
201
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
202
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
203
	    CURVAL=`/sbin/setpci -d 1022:1100 69`
204
	    for val in $CURVAL
205
	    do
206
		if [ "${val}" != "c0" ]; then
207
		    /sbin/setpci -d 1022:1100 69=c0
208
		    if [ $? -eq 0 ]; then
209
			break
210
		    else
211
			echo "Failed to apply AMD-8131 Errata #58 workaround"
212
		    fi
213
		fi
214
	    done
215
	fi
216
    fi
217
}
218
 
219
errata_56()
220
{
221
    # Check AMD chipset issue Errata #56
222
    if test -x /sbin/lspci && test -x /sbin/setpci; then
223
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
224
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
225
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
226
	    bus=""
227
	    # Look for devices AMD-8131
228
	    for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
229
	    do
230
		bus=`/sbin/setpci -s $dev 19`
231
		rev=`/sbin/setpci -s $dev 8`
232
		# Look for Tavor attach to secondary bus of this devices
233
		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
234
		do
235
		    if [ $rev -lt 13 ]; then
236
			/sbin/setpci -d 15b3:5a44 72=14
237
			if [ $? -eq 0 ]; then
238
			    break
239
			else
240
			    echo
241
			    echo "Failed to apply AMD-8131 Errata #56 workaround"
242
			fi
243
		    else
244
			continue
245
		    fi
246
		    # If more than one device is on the bus the issue a
247
		    # warning
248
		    num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`
249
		    if [ $num -gt 1 ]; then
250
			echo "Warning: your current PCI-X configuration might be incorrect."
251
			echo "see AMD-8131 Errata 56 for more details."
252
		    fi
253
		done
254
	    done
255
	fi
256
    fi
257
}
258
 
259
start()
260
{
261
    local RC=0
262
    local loaded=0
263
 
264
    echo -n "Loading OpenIB kernel modules:"
265
 
266
    load_hardware_modules
267
    RC+=$?
268
    load_modules $LOAD_CORE_MODULES
269
    RC+=$?
270
    load_modules $LOAD_CORE_CM_MODULES
271
    RC+=$?
272
    load_modules $LOAD_CORE_USER_MODULES
273
    RC+=$?
274
    load_modules $LOAD_ULP_MODULES
275
    RC+=$?
276
    if is_module rds && ! is_module rds_rdma && test "${RDS_LOAD}" = "yes"; then
277
	load_modules rds_rdma
278
	RC+=$?
279
    fi
280
 
281
    # Add node description to sysfs
282
    IBSYSDIR="/sys/class/infiniband"
283
    if [ -d ${IBSYSDIR} ]; then
284
        pushd ${IBSYSDIR} >/dev/null 2>&1
285
	for hca in *
286
	do
287
	    if [ -w ${hca}/node_desc ]; then
288
	    	echo -n "$(hostname -s) ${hca}" >> ${hca}/node_desc 2> /dev/null
289
	    fi
290
	done
291
	popd >/dev/null 2>&1
292
    fi
293
 
294
    errata_58
295
    errata_56
296
 
297
    touch /var/lock/subsys/rdma
298
    [ $RC -eq 0 ] && echo_success || echo_failure
299
    echo
300
    return $RC
301
}
302
 
303
stop()
304
{
305
    # Check if applications which use infiniband are running
306
    local apps="ibacm opensm osmtest srp_daemon"
307
    local pid
308
    local RC=0
309
 
310
    echo -n "Unloading OpenIB kernel modules:"
311
 
312
    for app in $apps
313
    do
314
    	if ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); then
315
	    echo
316
	    echo "Found $app running."
317
	    echo "Please stop all RDMA applications before downing the stack."
318
	    echo_failure
319
	    echo
320
	    return 1
321
	fi
322
    done
323
 
324
    if is_module svcrdma; then
325
    	echo "NFSoRDMA support is still enabled."
326
	echo "Please stop the nfs-rdma service before stopping the rdma service."
327
	echo_failure
328
	echo
329
	return 1
330
    fi
331
 
332
    if ! is_module ib_core; then
333
	# Nothing to do, make sure lock file is gone and return
334
	rm -f /var/lock/subsys/rdma
335
	echo_success
336
	echo
337
	return 0
338
    fi
339
 
340
    # Down all IPoIB interfaces
341
    if is_module ib_ipoib; then
342
	for i in $interfaces
343
	do
344
	    config=/etc/sysconfig/network-scripts/ifcfg-$i
345
	    if [ -e $config ]; then
346
		unset ${SLAVE}
347
		unset ${MASTER}
348
		. $config
349
	        [ "${SLAVE}" = yes -a "${MASTER}" ] && ifdown ${MASTER} >/dev/null 2>&1
350
	        ifdown $i >/dev/null 2>&1
351
	    else
352
		ip link set $i down
353
	    fi
354
	done
355
    fi
356
    # Small sleep to let the ifdown settle before we remove any modules
357
    sleep 1
358
 
359
    # Unload OpenIB modules
360
    MODULES="$UNLOAD_ULP_MODULES $UNLOAD_CORE_USER_MODULES"
361
    MODULES="$MODULES $UNLOAD_CORE_CM_MODULES"
362
    for mod in $MODULES
363
    do
364
	unload_module $mod
365
	RC=$[ $RC + $? ]
366
    done
367
    # Insert a sleep here for all the ULP modules to have been fully removed
368
    # before proceeding to unload the driver modules
369
    sleep 1
370
    MODULES="$UNLOAD_HW_MODULES $UNLOAD_CORE_MODULES"
371
    for mod in $MODULES
372
    do
373
	unload_module $mod
374
	RC=$[ $RC + $? ]
375
    done
376
 
377
    rm -f /var/lock/subsys/rdma
378
    [ $RC -eq 0 ] && echo_success || echo_failure
379
    echo
380
    return $RC
381
}
382
 
383
status()
384
{
385
    local -i cnt=0
386
    local -i modules=0
387
    local module=""
388
 
389
    echo -ne "Low level hardware support loaded:\n\t"
390
    for module in $UNLOAD_HW_MODULES; do
391
	if is_module $module; then
392
	    echo -n "$module "
393
	    let cnt++
394
	fi
395
    done
396
    [ $cnt -eq 0 ] && echo -n "none found"
397
    modules+=cnt
398
    echo
399
    echo
400
 
401
    echo -ne "Upper layer protocol modules:\n\t"
402
    cnt=0
403
    for module in $UNLOAD_ULP_MODULES; do
404
	if is_module $module; then
405
	    echo -n "$module "
406
	    let cnt++
407
	fi
408
    done
409
    [ $cnt -eq 0 ] && echo -n "none found"
410
    modules+=cnt
411
    echo
412
    echo
413
 
414
    echo -ne "User space access modules:\n\t"
415
    cnt=0
416
    for module in $UNLOAD_CORE_USER_MODULES; do
417
	if is_module $module; then
418
	    echo -n "$module "
419
	    let cnt++
420
	fi
421
    done
422
    [ $cnt -eq 0 ] && echo -n "none found"
423
    modules+=cnt
424
    echo
425
    echo
426
 
427
    echo -ne "Connection management modules:\n\t"
428
    cnt=0
429
    for module in $UNLOAD_CORE_CM_MODULES; do
430
	if is_module $module; then
431
	    echo -n "$module "
432
	    let cnt++
433
	fi
434
    done
435
    [ $cnt -eq 0 ] && echo -n "none found"
436
    modules+=cnt
437
    echo
438
    echo
439
 
440
    for module in $UNLOAD_CORE_MODULES; do
441
	if is_module $module; then
442
	    let modules++
443
	fi
444
    done
445
 
446
    if is_module ib_ipoib; then
447
	echo -n "Configured IPoIB interfaces: "
448
	configs=$(get_configs_by_type "InfiniBand")
449
	[ -n "$configs" ] && echo $configs || echo -n "none"
450
	echo
451
	echo -n "Currently active IPoIB interfaces: "
452
	cnt=0
453
	for i in $interfaces
454
	do
455
	    ip link show $i | grep -w UP > /dev/null 2>&1
456
	    [ $? -eq 0 ] && echo -n "$i " && let cnt++
457
	done
458
	[ $cnt -eq 0 ] && echo -n "none"
459
	echo
460
    fi
461
 
462
    if [ $modules -eq 0 ]; then
463
	if [ -f /var/lock/subsys/rdma ]; then
464
	    return 2
465
	else
466
	    return 3
467
	fi
468
    else
469
	return 0
470
    fi
471
}
472
 
473
restart ()
474
{
475
    stop
476
    start
477
}
478
 
479
condrestart ()
480
{
481
    [ -e /var/lock/subsys/rdma ] && restart || return 0
482
}
483
 
484
usage ()
485
{
486
    echo
487
    echo "Usage: `basename $0` {start|stop|restart|condrestart|try-restart|force-reload|status}"
488
    echo
489
    return 2
490
}
491
 
492
case $1 in
493
    start|stop|restart|condrestart|try-restart|force-reload)
494
	[ `id -u` != "0" ] && exit 4 ;;
495
esac
496
 
497
case $1 in
498
    start) start; RC=$? ;;
499
    stop) stop; RC=$? ;;
500
    restart) restart; RC=$? ;;
501
    reload) RC=3 ;;
502
    condrestart) condrestart; RC=$? ;;
503
    try-restart) condrestart; RC=$? ;;
504
    force-reload) condrestart; RC=$? ;;
505
    status) status; RC=$? ;;
506
    *) usage; RC=$? ;;
507
esac
508
 
509
exit $RC