Rev 34 | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/bin/bash## Bring up/down the kernel RDMA stack## chkconfig: 12345 05 95# description: Loads/Unloads InfiniBand and iWARP kernel modules# config: /etc/rdma/rdma.conf#### BEGIN INIT INFO# Provides: rdma# Default-Start: 1 2 3 4 5# Default-Stop: 0 6# Required-Start:# Required-Stop:# Short-Description: Loads and unloads the InfiniBand and iWARP kernel modules# Description: Loads and unloads the InfiniBand and iWARP kernel modules### END INIT INFOCONFIG=/etc/rdma/rdma.conf. /etc/rc.d/init.d/functionsLOAD_ULP_MODULES=""LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"LOAD_CORE_MODULES="ib_addr ib_core ib_mad ib_sa"if [ -f $CONFIG ]; then. $CONFIGif [ "${RDS_LOAD}" == "yes" ]; thenIPOIB_LOAD=yesfiif [ "${IPOIB_LOAD}" == "yes" ]; thenLOAD_ULP_MODULES="ib_ipoib"fiif [ "${RDS_LOAD}" == "yes" ]; thenLOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"fiif [ "${SRP_LOAD}" == "yes" ]; thenLOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"fiif [ "${ISER_LOAD}" == "yes" ]; thenLOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"fielseLOAD_ULP_MODULES="ib_ipoib"fiUNLOAD_ULP_MODULES="ib_iser ib_srp rds_rdma ib_ipoib"UNLOAD_HW_MODULES="iw_c2 iw_cxgb4 iw_cxgb3 iw_nes ib_ehca ib_ipath ib_mthca mlx4_ib ib_qib mlx5_ib mlx5_core ocrdma"UNLOAD_CORE_USER_MODULES="rdma_ucm ib_ucm ib_uverbs ib_umad"UNLOAD_CORE_CM_MODULES="rdma_cm ib_cm iw_cm"UNLOAD_CORE_MODULES="ib_sa ib_mad ib_core ib_addr"# We support renaming ib? interfaces to named interfaces, so do scan for link# type infiniband and pull out whatever names are presentinterfaces=`LANG=C ip -o link | awk -F ': ' -vIGNORECASE=1 '/link\/infiniband/ { print $2 }'`get_configs_by_type (){LANG=C grep -E -i -l "^[[:space:]]*TYPE=${1}" \/etc/sysconfig/network-scripts/ifcfg-* \| LC_ALL=C sed -e "$__sed_discard_ignored_files" \| cut -f 3 -d '-'}# If module $1 is loaded return - 0 else - 1is_module(){/sbin/lsmod | grep -w "$1" > /dev/null 2>&1return $?}load_modules(){local RC=0for module in $*; doif ! /sbin/modinfo $module > /dev/null 2>&1; then# do not attempt to load modules which do not existcontinuefiif ! is_module $module; then/sbin/modprobe $moduleres=$?RC=$[ $RC + $res ]if [ $res -ne 0 ]; thenechoecho -n "Failed to load module $mod"fifidonereturn $RC}unload_module(){local mod=$1# Unload module $1if is_module $mod; then/sbin/rmmod $mod > /dev/null 2>&1if [ $? -ne 0 ]; thenechoecho "Failed to unload $mod"return 1fifireturn 0}# This function is a horrible hack to work around BIOS authors that should# be shot. Specifically, certain BIOSes will map the entire 4GB address# space as write-back cacheable when the machine has 4GB or more of RAM, and# then they will exclude the reserved PCI I/O addresses from that 4GB# cacheable mapping by making on overlapping uncacheable mapping. However,# once you do that, it is then impossible to set *any* of the PCI I/O# address space as write-combining. This is an absolute death-knell to# certain IB hardware. So, we unroll this mapping here. Instead of# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as# a series of discreet mappings that effectively are the same as the 4GB# mapping minus the hole, and then we delete the uncacheable mappings that# are used to punch the hole. This then leaves the PCI I/O address space# unregistered (which defaults it to uncacheable), but available for# write-combining mappings where needed.check_mtrr_registers(){# If we actually change the mtrr registers, then the awk script will# return true, and we need to unload the ib_ipath module if it's already# loaded. The udevtrigger in load_hardware_modules will immediately# reload the ib_ipath module for us, so there shouldn't be a problem.[ -f /proc/mtrr -a -f /etc/rdma/fixup-mtrr.awk ] &&awk -f /etc/rdma/fixup-mtrr.awk /proc/mtrr 2>/dev/null &&if is_module ib_ipath; then/sbin/rmmod ib_ipathfi}load_hardware_modules(){local -i RC=0[ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers# WARNING!! If you are using this script to take down and bring up# your IB interfaces on a machine that uses more than one low level# Infiniband hardware driver, then there is no guarantee that the# ordering of rdma interfaces after you take down and bring up the# stack will be the same as the ordering of the interfaces on a# clean boot.## We match both class NETWORK and class INFINIBAND devices since our# iWARP hardware is listed under class NETWORK. The side effect of# this is that we might cause a non-iWARP network driver to be loaded.udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x0c0600udevadm settleif [ -r /proc/device-tree ]; thenif [ -n "`ls /proc/device-tree | grep lhca`" ]; thenif ! is_module ib_ehca; thenload_modules ib_ehcaRC+=$?fififiif is_module cxgb3 -a ! is_module iw_cxgb3; thenload_modules iw_cxgb3RC+=$?fiif is_module cxgb4 -a ! is_module iw_cxgb4; thenload_modules iw_cxgb4RC+=$?fiif is_module mlx4_core -a ! is_module mlx4_ib; thenload_modules mlx4_ibRC+=$?fiif is_module mlx5_core -a ! is_module mlx5_ib; thenload_modules mlx5_ibRC+=$?fiif is_module be2net -a ! is_module ocrdma; thenload_modules ocrdmaRC+=$?fiif is_module enic -a ! is_module usnic_verbs; thenload_modules usnic_verbsRC+=$?fireturn $RC}errata_58(){# Check AMD chipset issue Errata #58if test -x /sbin/lspci && test -x /sbin/setpci; thenif ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); thenCURVAL=`/sbin/setpci -d 1022:1100 69`for val in $CURVALdoif [ "${val}" != "c0" ]; then/sbin/setpci -d 1022:1100 69=c0if [ $? -eq 0 ]; thenbreakelseecho "Failed to apply AMD-8131 Errata #58 workaround"fifidonefifi}errata_56(){# Check AMD chipset issue Errata #56if test -x /sbin/lspci && test -x /sbin/setpci; thenif ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); thenbus=""# Look for devices AMD-8131for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`dobus=`/sbin/setpci -s $dev 19`rev=`/sbin/setpci -s $dev 8`# Look for Tavor attach to secondary bus of this devicesfor device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`doif [ $rev -lt 13 ]; then/sbin/setpci -d 15b3:5a44 72=14if [ $? -eq 0 ]; thenbreakelseechoecho "Failed to apply AMD-8131 Errata #56 workaround"fielsecontinuefi# If more than one device is on the bus the issue a# warningnum=`/sbin/setpci -f -s $bus: 0 | wc -l | sed 's/\ *//g'`if [ $num -gt 1 ]; thenecho "Warning: your current PCI-X configuration might be incorrect."echo "see AMD-8131 Errata 56 for more details."fidonedonefifi}start(){local RC=0local loaded=0echo -n "Loading OpenIB kernel modules:"load_hardware_modulesRC+=$?load_modules $LOAD_CORE_MODULESRC+=$?load_modules $LOAD_CORE_CM_MODULESRC+=$?load_modules $LOAD_CORE_USER_MODULESRC+=$?load_modules $LOAD_ULP_MODULESRC+=$?if is_module rds && ! is_module rds_rdma && test "${RDS_LOAD}" = "yes"; thenload_modules rds_rdmaRC+=$?fi# Add node description to sysfsIBSYSDIR="/sys/class/infiniband"if [ -d ${IBSYSDIR} ]; thenpushd ${IBSYSDIR} >/dev/null 2>&1for hca in *doif [ -w ${hca}/node_desc ]; thenecho -n "$(hostname -s) ${hca}" >> ${hca}/node_desc 2> /dev/nullfidonepopd >/dev/null 2>&1fierrata_58errata_56touch /var/lock/subsys/rdma[ $RC -eq 0 ] && echo_success || echo_failureechoreturn $RC}stop(){# Check if applications which use infiniband are runninglocal apps="ibacm opensm osmtest srp_daemon"local pidlocal RC=0echo -n "Unloading OpenIB kernel modules:"for app in $appsdoif ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); thenechoecho "Found $app running."echo "Please stop all RDMA applications before downing the stack."echo_failureechoreturn 1fidoneif is_module svcrdma; thenecho "NFSoRDMA support is still enabled."echo "Please stop the nfs-rdma service before stopping the rdma service."echo_failureechoreturn 1fiif ! is_module ib_core; then# Nothing to do, make sure lock file is gone and returnrm -f /var/lock/subsys/rdmaecho_successechoreturn 0fi# Down all IPoIB interfacesif is_module ib_ipoib; thenfor i in $interfacesdoconfig=/etc/sysconfig/network-scripts/ifcfg-$iif [ -e $config ]; thenunset ${SLAVE}unset ${MASTER}. $config[ "${SLAVE}" = yes -a "${MASTER}" ] && ifdown ${MASTER} >/dev/null 2>&1ifdown $i >/dev/null 2>&1elseip link set $i downfidonefi# Small sleep to let the ifdown settle before we remove any modulessleep 1# Unload OpenIB modulesMODULES="$UNLOAD_ULP_MODULES $UNLOAD_CORE_USER_MODULES"MODULES="$MODULES $UNLOAD_CORE_CM_MODULES"for mod in $MODULESdounload_module $modRC=$[ $RC + $? ]done# Insert a sleep here for all the ULP modules to have been fully removed# before proceeding to unload the driver modulessleep 1MODULES="$UNLOAD_HW_MODULES $UNLOAD_CORE_MODULES"for mod in $MODULESdounload_module $modRC=$[ $RC + $? ]donerm -f /var/lock/subsys/rdma[ $RC -eq 0 ] && echo_success || echo_failureechoreturn $RC}status(){local -i cnt=0local -i modules=0local module=""echo -ne "Low level hardware support loaded:\n\t"for module in $UNLOAD_HW_MODULES; doif is_module $module; thenecho -n "$module "let cnt++fidone[ $cnt -eq 0 ] && echo -n "none found"modules+=cntechoechoecho -ne "Upper layer protocol modules:\n\t"cnt=0for module in $UNLOAD_ULP_MODULES; doif is_module $module; thenecho -n "$module "let cnt++fidone[ $cnt -eq 0 ] && echo -n "none found"modules+=cntechoechoecho -ne "User space access modules:\n\t"cnt=0for module in $UNLOAD_CORE_USER_MODULES; doif is_module $module; thenecho -n "$module "let cnt++fidone[ $cnt -eq 0 ] && echo -n "none found"modules+=cntechoechoecho -ne "Connection management modules:\n\t"cnt=0for module in $UNLOAD_CORE_CM_MODULES; doif is_module $module; thenecho -n "$module "let cnt++fidone[ $cnt -eq 0 ] && echo -n "none found"modules+=cntechoechofor module in $UNLOAD_CORE_MODULES; doif is_module $module; thenlet modules++fidoneif is_module ib_ipoib; thenecho -n "Configured IPoIB interfaces: "configs=$(get_configs_by_type "InfiniBand")[ -n "$configs" ] && echo $configs || echo -n "none"echoecho -n "Currently active IPoIB interfaces: "cnt=0for i in $interfacesdoip link show $i | grep -w UP > /dev/null 2>&1[ $? -eq 0 ] && echo -n "$i " && let cnt++done[ $cnt -eq 0 ] && echo -n "none"echofiif [ $modules -eq 0 ]; thenif [ -f /var/lock/subsys/rdma ]; thenreturn 2elsereturn 3fielsereturn 0fi}restart (){stopstart}condrestart (){[ -e /var/lock/subsys/rdma ] && restart || return 0}usage (){echoecho "Usage: `basename $0` {start|stop|restart|condrestart|try-restart|force-reload|status}"echoreturn 2}case $1 instart|stop|restart|condrestart|try-restart|force-reload)[ `id -u` != "0" ] && exit 4 ;;esaccase $1 instart) start; RC=$? ;;stop) stop; RC=$? ;;restart) restart; RC=$? ;;reload) RC=3 ;;condrestart) condrestart; RC=$? ;;try-restart) condrestart; RC=$? ;;force-reload) condrestart; RC=$? ;;status) status; RC=$? ;;*) usage; RC=$? ;;esacexit $RC