diff options
Diffstat (limited to 'grid5000/steps/data/setup/puppet/modules/env/files/base')
13 files changed, 2055 insertions, 0 deletions
diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/cpufreq/cpufrequtils b/grid5000/steps/data/setup/puppet/modules/env/files/base/cpufreq/cpufrequtils new file mode 100644 index 0000000..03070fe --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/cpufreq/cpufrequtils @@ -0,0 +1 @@ +GOVERNOR="performance" diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/ganglia/gmond.conf b/grid5000/steps/data/setup/puppet/modules/env/files/base/ganglia/gmond.conf new file mode 100644 index 0000000..a2f8292 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/ganglia/gmond.conf @@ -0,0 +1,336 @@ +/* This configuration is as close to 2.5.x default behavior as possible + The values closely match ./gmond/metric.h definitions in 2.5.x */ +globals { + daemonize = yes + setuid = yes + user = ganglia + debug_level = 0 + max_udp_msg_len = 1472 + mute = no + deaf = yes + host_dmax = 0 /*secs */ + cleanup_threshold = 300 /*secs */ + gexec = no + send_metadata_interval = 0 +} + +/* If a cluster attribute is specified, then all gmond hosts are wrapped inside + * of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will + * NOT be wrapped inside of a <CLUSTER> tag. */ +cluster { + name = "my_clustername" + owner = "INRIA" + latlong = "unspecified" + url = "unspecified" +} + +/* The host section describes attributes of the host, like the location */ +host { + location = "unspecified" +} + +/* Feel free to specify as many udp_send_channels as you like. Gmond + used to only support having a single channel */ +udp_send_channel { + mcast_join = 239.2.11.71 + port = 8649 + ttl = 1 +} + +/* You can specify as many udp_recv_channels as you like as well. */ +udp_recv_channel { + mcast_join = 239.2.11.71 + port = 8649 + bind = 239.2.11.71 +} + +/* You can specify as many tcp_accept_channels as you like to share + an xml description of the state of the cluster */ +tcp_accept_channel { + port = 8649 +} + +/* Each metrics module that is referenced by gmond must be specified and + loaded. If the module has been statically linked with gmond, it does not + require a load path. However all dynamically loadable modules must include + a load path. */ +modules { + module { + name = "core_metrics" + } + module { + name = "cpu_module" + path = "/usr/lib/ganglia/modcpu.so" + } + module { + name = "disk_module" + path = "/usr/lib/ganglia/moddisk.so" + } + module { + name = "load_module" + path = "/usr/lib/ganglia/modload.so" + } + module { + name = "mem_module" + path = "/usr/lib/ganglia/modmem.so" + } + module { + name = "net_module" + path = "/usr/lib/ganglia/modnet.so" + } + module { + name = "proc_module" + path = "/usr/lib/ganglia/modproc.so" + } + module { + name = "sys_module" + path = "/usr/lib/ganglia/modsys.so" + } +} + +include ('/etc/ganglia/conf.d/*.conf') + + +/* The old internal 2.5.x metric array has been replaced by the following + collection_group directives. What follows is the default behavior for + collecting and sending metrics that is as close to 2.5.x behavior as + possible. */ + +/* This collection group will cause a heartbeat (or beacon) to be sent every + 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses + the age of the running gmond. */ +collection_group { + collect_once = yes + time_threshold = 20 + metric { + name = "heartbeat" + } +} + +/* This collection group will send general info about this host every 1200 secs. + This information doesn't change between reboots and is only collected once. */ +collection_group { + collect_once = yes + time_threshold = 1200 + metric { + name = "cpu_num" + title = "CPU Count" + } + metric { + name = "cpu_speed" + title = "CPU Speed" + } + metric { + name = "mem_total" + title = "Memory Total" + } + /* Should this be here? Swap can be added/removed between reboots. */ + metric { + name = "swap_total" + title = "Swap Space Total" + } + metric { + name = "boottime" + title = "Last Boot Time" + } + metric { + name = "machine_type" + title = "Machine Type" + } + metric { + name = "os_name" + title = "Operating System" + } + metric { + name = "os_release" + title = "Operating System Release" + } + metric { + name = "location" + title = "Location" + } +} + +/* This collection group will send the status of gexecd for this host every 300 secs */ +/* Unlike 2.5.x the default behavior is to report gexecd OFF. */ +collection_group { + collect_once = yes + time_threshold = 300 + metric { + name = "gexec" + title = "Gexec Status" + } +} + +/* This collection group will collect the CPU status info every 20 secs. + The time threshold is set to 90 seconds. In honesty, this time_threshold could be + set significantly higher to reduce unneccessary network chatter. */ +collection_group { + collect_every = 20 + time_threshold = 90 + /* CPU status */ + metric { + name = "cpu_user" + value_threshold = "1.0" + title = "CPU User" + } + metric { + name = "cpu_system" + value_threshold = "1.0" + title = "CPU System" + } + metric { + name = "cpu_idle" + value_threshold = "5.0" + title = "CPU Idle" + } + metric { + name = "cpu_nice" + value_threshold = "1.0" + title = "CPU Nice" + } + metric { + name = "cpu_aidle" + value_threshold = "5.0" + title = "CPU aidle" + } + metric { + name = "cpu_wio" + value_threshold = "1.0" + title = "CPU wio" + } + /* The next two metrics are optional if you want more detail... + ... since they are accounted for in cpu_system. + metric { + name = "cpu_intr" + value_threshold = "1.0" + title = "CPU intr" + } + metric { + name = "cpu_sintr" + value_threshold = "1.0" + title = "CPU sintr" + } + */ +} + +collection_group { + collect_every = 20 + time_threshold = 90 + /* Load Averages */ + metric { + name = "load_one" + value_threshold = "1.0" + title = "One Minute Load Average" + } + metric { + name = "load_five" + value_threshold = "1.0" + title = "Five Minute Load Average" + } + metric { + name = "load_fifteen" + value_threshold = "1.0" + title = "Fifteen Minute Load Average" + } +} + +/* This group collects the number of running and total processes */ +collection_group { + collect_every = 80 + time_threshold = 950 + metric { + name = "proc_run" + value_threshold = "1.0" + title = "Total Running Processes" + } + metric { + name = "proc_total" + value_threshold = "1.0" + title = "Total Processes" + } +} + +/* This collection group grabs the volatile memory metrics every 40 secs and + sends them at least every 180 secs. This time_threshold can be increased + significantly to reduce unneeded network traffic. */ +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "mem_free" + value_threshold = "1024.0" + title = "Free Memory" + } + metric { + name = "mem_shared" + value_threshold = "1024.0" + title = "Shared Memory" + } + metric { + name = "mem_buffers" + value_threshold = "1024.0" + title = "Memory Buffers" + } + metric { + name = "mem_cached" + value_threshold = "1024.0" + title = "Cached Memory" + } + metric { + name = "swap_free" + value_threshold = "1024.0" + title = "Free Swap Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 300 + metric { + name = "bytes_out" + value_threshold = 4096 + title = "Bytes Sent" + } + metric { + name = "bytes_in" + value_threshold = 4096 + title = "Bytes Received" + } + metric { + name = "pkts_in" + value_threshold = 256 + title = "Packets Received" + } + metric { + name = "pkts_out" + value_threshold = 256 + title = "Packets Sent" + } +} + +/* Different than 2.5.x default since the old config made no sense */ +collection_group { + collect_every = 1800 + time_threshold = 3600 + metric { + name = "disk_total" + value_threshold = 1.0 + title = "Total Disk Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "disk_free" + value_threshold = 1.0 + title = "Disk Space Available" + } + metric { + name = "part_max_used" + value_threshold = 1.0 + title = "Maximum Disk Space Used" + } +} diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/90-ib.rules b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/90-ib.rules new file mode 100644 index 0000000..994f4a0 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/90-ib.rules @@ -0,0 +1,6 @@ +KERNEL=="umad*", NAME="infiniband/%k" +KERNEL=="issm*", NAME="infiniband/%k" +KERNEL=="ucm*", NAME="infiniband/%k", MODE="0666" +KERNEL=="uverbs*", NAME="infiniband/%k", MODE="0666" +KERNEL=="ucma", NAME="infiniband/%k", MODE="0666" +KERNEL=="rdma_cm", NAME="infiniband/%k", MODE="0666" diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openib.conf b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openib.conf new file mode 100644 index 0000000..87981c7 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openib.conf @@ -0,0 +1,39 @@ +# Start HCA driver upon boot +ONBOOT=yes + +# Load UCM module +UCM_LOAD=no + +# Load RDMA_CM module +RDMA_CM_LOAD=yes + +# Load RDMA_UCM module +RDMA_UCM_LOAD=yes + +# Increase ib_mad thread priority +RENICE_IB_MAD=no + +# Load MTHCA +MTHCA_LOAD=yes + +# Load MLX4 modules +MLX4_LOAD=yes + +# Load MLX5 modules +MLX5_LOAD=yes + +# Load MLX4_EN module +MLX4_EN_LOAD=yes + +# Load CXGB3 modules +CXGB3_LOAD=no + +# Load NES modules +NES_LOAD=no + +# Load IPoIB +IPOIB_LOAD=yes + +# Enable IPoIB Connected Mode +SET_IPOIB_CM=yes + diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd new file mode 100644 index 0000000..b943e72 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd @@ -0,0 +1,1610 @@ +#!/bin/bash +### BEGIN INIT INFO +# Provides: openibd +# Required-Start: $local_fs +# Required-Stop: opensmd +# Default-Start: 2 3 5 +# Default-Stop: 0 1 2 6 +# Description: Activates/Deactivates InfiniBand Driver to # start at boot time. +### END INIT INFO +# +# Copyright (c) 2013 Mellanox Technologies. All rights reserved. +# Copyright (c) 2010 QLogic Corporation. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# +# $Id: openibd 9139 2006-08-29 14:03:38Z vlad $ +# + +# config: /etc/infiniband/openib.conf +OPENIBD_CONFIG=${OPENIBD_CONFIG:-"/etc/infiniband/openib.conf"} +CONFIG=$OPENIBD_CONFIG +export LANG=en_US.UTF-8 + +if [ ! -f $CONFIG ]; then + echo No InfiniBand configuration found + exit 0 +fi + +. $CONFIG + +CWD=`pwd` +cd /etc/infiniband +WD=`pwd` + +PATH=$PATH:/sbin:/usr/bin:/lib/udev +if [ -e /etc/profile.d/ofed.sh ]; then + . /etc/profile.d/ofed.sh +fi + +# Only use ONBOOT option if called by a runlevel directory. +# Therefore determine the base, follow a runlevel link name ... +base=${0##*/} +link=${base#*[SK][0-9][0-9]} +# ... and compare them +if [[ $link == $base && "$0" != "/etc/rc.d/init.d/openibd" ]] ; then + RUNMODE=manual + ONBOOT=yes +else + RUNMODE=auto +fi + +# Allow unsupported modules, if disallowed by current configuration +modprobe=/sbin/modprobe +if ${modprobe} -c | grep -q '^allow_unsupported_modules *0'; then + modprobe="${modprobe} --allow-unsupported-modules" +fi + +ACTION=$1 +shift +max_ports_num_in_hca=0 + +# Check if OpenIB configured to start automatically +if [ "X${ONBOOT}" != "Xyes" ]; then + exit 0 +fi + +if ( grep -i 'SuSE Linux' /etc/issue >/dev/null 2>&1 ); then + if [ -n "$INIT_VERSION" ] ; then + # MODE=onboot + if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${CONFIG} > /dev/null ; then + exit 0 + fi + fi +fi + +######################################################################### +# Get a sane screen width +[ -z "${COLUMNS:-}" ] && COLUMNS=80 + +[ -z "${CONSOLETYPE:-}" ] && [ -x /sbin/consoletype ] && CONSOLETYPE="`/sbin/consoletype`" + +# Read in our configuration +if [ -z "${BOOTUP:-}" ]; then + if [ -f /etc/sysconfig/init ]; then + . /etc/sysconfig/init + else + # This all seem confusing? Look in /etc/sysconfig/init, + # or in /usr/doc/initscripts-*/sysconfig.txt + BOOTUP=color + RES_COL=60 + MOVE_TO_COL="echo -en \\033[${RES_COL}G" + SETCOLOR_SUCCESS="echo -en \\033[1;32m" + SETCOLOR_FAILURE="echo -en \\033[1;31m" + SETCOLOR_WARNING="echo -en \\033[1;33m" + SETCOLOR_NORMAL="echo -en \\033[0;39m" + LOGLEVEL=1 + fi + if [ "$CONSOLETYPE" = "serial" ]; then + BOOTUP=serial + MOVE_TO_COL= + SETCOLOR_SUCCESS= + SETCOLOR_FAILURE= + SETCOLOR_WARNING= + SETCOLOR_NORMAL= + fi +fi + +if [ "${BOOTUP:-}" != "verbose" ]; then + INITLOG_ARGS="-q" +else + INITLOG_ARGS= +fi + +echo_success() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[ " + [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS + echo -n $"OK" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n " ]" + echo -e "\r" + return 0 +} + +echo_done() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[ " + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n $"done" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n " ]" + echo -e "\r" + return 0 +} + +echo_failure() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[" + [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE + echo -n $"FAILED" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n "]" + echo -e "\r" + return 1 +} + +echo_warning() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[" + [ "$BOOTUP" = "color" ] && $SETCOLOR_WARNING + echo -n $"WARNING" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n "]" + echo -e "\r" + return 1 +} + +count_ib_ports() +{ + local cnt=0 + local ports_in_hca=0 + sysdir=/sys/class/infiniband + hcas=$(/bin/ls -1 ${sysdir} 2> /dev/null) + for hca in $hcas + do + ports_in_hca=$(/bin/ls -1 ${sysdir}/${hca}/ports 2> /dev/null | wc -l) + if [ $ports_in_hca -gt $max_ports_num_in_hca ]; then + max_ports_num_in_hca=$ports_in_hca + fi + cnt=$[ $cnt + $ports_in_hca ] + done + + return $cnt +} + +# Setting Environment variables +if [ -f /etc/redhat-release ]; then + DISTRIB="RedHat" + NETWORK_CONF_DIR="/etc/sysconfig/network-scripts" +elif [ -f /etc/rocks-release ]; then + DISTRIB="Rocks" + NETWORK_CONF_DIR="/etc/sysconfig/network-scripts" +elif [ -f /etc/SuSE-release ]; then + DISTRIB="SuSE" + NETWORK_CONF_DIR="/etc/sysconfig/network" +elif [ -f /etc/debian_version ]; then + DISTRIB="Debian" + NETWORK_CONF_DIR="/etc/infiniband" +else + DISTRIB=`ls /etc/*-release | head -n 1 | xargs -iXXX basename XXX -release 2> /dev/null` + if [ -d /etc/sysconfig/network-scripts ]; then + NETWORK_CONF_DIR="/etc/sysconfig/network-scripts" + elif [ -d /etc/sysconfig/network ]; then + NETWORK_CONF_DIR="/etc/sysconfig/network" + else + echo_failure "You system is not supported for IPoIB configuration" + echo "Try to load driver manually using configuration files from $WD directory" + exit 1 + fi +fi + +# Define kernel version prefix +KPREFIX=`uname -r | cut -c -3 | tr -d '.' | tr -d '[:space:]'` + +# Setting OpenIB start parameters +POST_LOAD_MODULES="" + +RUN_SYSCTL=${RUN_SYSCTL:-"no"} + +IPOIB=0 +IPOIB_MTU=${IPOIB_MTU:-65520} +if [ "X${IPOIB_LOAD}" == "Xyes" ]; then + IPOIB=1 +fi + +if [ "X${SRP_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srp" +fi + +if [ "X${SRPT_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srpt" +fi + +if [ "X${QLGC_VNIC_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES qlgc_vnic" +fi + +if [ "X${SRP_TARGET_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srp_target" +fi + +if [ "X${RDMA_CM_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_cm" +fi + +if [ "X${UCM_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES ib_ucm" +fi + +if [ "X${RDS_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES rds rds_rdma rds_tcp" +fi + +if [ "X${RDMA_UCM_LOAD}" == "Xyes" ]; then + POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_ucm" +fi + +GEN1_UNLOAD_MODULES="ib_srp_target scsi_target ib_srp kdapltest_module ib_kdapl ib_useraccess ib_useraccess_cm ib_cm ib_dapl_srv ib_ip2pr ib_ipoib ib_tavor mod_thh mod_rhh ib_dm_client ib_sa_client ib_client_query ib_poll ib_mad ib_core ib_services" + +UNLOAD_MODULES="ib_mthca mlx5_ib mlx5_core mlx4_ib ib_ipath ipath_core ib_ehca iw_nes i40iw iw_cxgb3 cxgb3 iw_cxgb4 cxgb4 ocrdma bnxt_re bnxt_en" +UNLOAD_MODULES="$UNLOAD_MODULES ib_qib" +UNLOAD_MODULES="$UNLOAD_MODULES ib_ipoib ib_madeye ib_rds" +UNLOAD_MODULES="$UNLOAD_MODULES rpcrdma rds_rdma rds_tcp rds ib_ucm kdapl ib_srp_target scsi_target ib_srpt ib_srp ib_iser" +UNLOAD_MODULES="$UNLOAD_MODULES rdma_ucm rdma_cm iw_cm ib_cm ib_local_sa findex" +UNLOAD_MODULES="$UNLOAD_MODULES ib_sa ib_uverbs ib_umad ib_mad ib_core ib_addr" + +STATUS_MODULES="rdma_ucm ib_rds rds rds_rdma rds_tcp ib_srpt ib_srp qlgc_vnic rdma_cm ib_addr ib_local_sa findex ib_ipoib ib_ehca ib_ipath ipath_core mlx4_core mlx4_ib mlx4_en mlx5_core mlx5_ib ib_mthca ib_uverbs ib_umad ib_ucm ib_sa ib_cm ib_mad ib_core iw_cxgb3 iw_cxgb4 iw_nes i40iw vmw_pvrdma" +STATUS_MODULES="$STATUS_MODULES ib_qib ocrdma bnxt_re bnxt_en" + +if (modinfo scsi_transport_srp 2>/dev/null | grep depends: | grep -q compat 2>/dev/null) || + (lsmod 2>/dev/null | grep scsi_transport_srp | grep -q compat); then + UNLOAD_MODULES="$UNLOAD_MODULES scsi_transport_srp" + STATUS_MODULES="$STATUS_MODULES scsi_transport_srp" +fi + +ipoib_ha_pidfile=/var/run/ipoib_ha.pid +srp_daemon_pidfile=/var/run/srp_daemon.pid +_truescale=/etc/infiniband/truescale.cmds + +get_interfaces() +{ + interfaces=$(cd /sys/class/net;/bin/ls -d ib* 2> /dev/null) +} + +get_mlx4_en_interfaces() +{ + mlx4_en_interfaces="" + for ethpath in /sys/class/net/* + do + if (grep 0x15b3 ${ethpath}/device/vendor > /dev/null 2>&1); then + mlx4_en_interfaces="$mlx4_en_interfaces ${ethpath##*/}" + fi + done +} + +# If module $1 is loaded return - 0 else - 1 +is_module() +{ +local RC + + /sbin/lsmod | grep -w "$1" > /dev/null 2>&1 + RC=$? + +return $RC +} + +log_msg() +{ + logger -i "openibd: $@" +} + +load_module() +{ + local module=$1 + filename=`modinfo $module | grep filename | awk '{print $NF}'` + + if [ ! -n "$filename" ]; then + echo_failure "Module $module does not exist" + log_msg "Error: Module $module does not exist" + return 1 + fi + + if [ -L $filename ]; then + filename=`readlink -f $filename` + fi + + rpm_name=`/bin/rpm -qf $filename --queryformat "[%{NAME}]" 2> /dev/null` + if [ $? -ne 0 ]; then + echo_warning "Module $module does not belong to OFED" + log_msg "Module $module does not belong to OFED" + else + case "$rpm_name" in + *compat-rdma* | kernel-ib) + ;; + *) + echo_warning "Module $module belong to $rpm_name which is not a part of OFED" + log_msg "Module $module belong to $rpm_name which is not a part of OFED" + ;; + esac + fi + ${modprobe} $module > /dev/null 2>&1 +} + +# Return module's refcnt +is_ref() +{ + local refcnt + refcnt=`cat /sys/module/"$1"/refcnt 2> /dev/nill` + return $refcnt +} + +get_sw_fw_info() +{ + INFO=/etc/infiniband/info + OFEDHOME="/usr/local" + if [ -x ${INFO} ]; then + OFEDHOME=$(${INFO} | grep -w prefix | cut -d '=' -f 2) + fi + MREAD=$(which mstmread 2> /dev/null) + + # Get OFED Build id + if [ -r ${OFEDHOME}/BUILD_ID ]; then + echo "Software" + echo "-------------------------------------" + printf "Build ID:\n" + cat ${OFEDHOME}/BUILD_ID + echo "-------------------------------------" + fi + + # Get FW version + if [ ! -x ${MREAD} ]; then + return 1 + fi + + vendor="15b3" + slots=$(lspci -n -d "${vendor}:" 2> /dev/null | grep -v "5a46" | cut -d ' ' -f 1) + for mst_device in $slots + do + major=$($MREAD ${mst_device} 0x82478 2> /dev/null | cut -d ':' -f 2) + subminor__minor=$($MREAD ${mst_device} 0x8247c 2> /dev/null | cut -d ':' -f 2) + ftime=$($MREAD ${mst_device} 0x82480 2> /dev/null | cut -d ':' -f 2) + fdate=$($MREAD ${mst_device} 0x82484 2> /dev/null | cut -d ':' -f 2) + + major=$(echo -n $major | cut -d x -f 2 | cut -b 4) + subminor__minor1=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 3,4) + subminor__minor2=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 5,6,7,8) + echo + echo "Device ${mst_device} Info:" + echo "Firmware:" + + printf "\tVersion:" + printf "\t$major.$subminor__minor1.$subminor__minor2\n" + + day=$(echo -n $fdate | cut -d x -f 2 | cut -b 7,8) + month=$(echo -n $fdate | cut -d x -f 2 | cut -b 5,6) + year=$(echo -n $fdate | cut -d x -f 2 | cut -b 1,2,3,4) + hour=$(echo -n $ftime | cut -d x -f 2 | cut -b 5,6) + min=$(echo -n $ftime | cut -d x -f 2 | cut -b 3,4) + sec=$(echo -n $ftime | cut -d x -f 2 | cut -b 1,2) + + printf "\tDate:" + printf "\t$day/$month/$year $hour:$min:$sec\n" + done +} + +# Create debug info +get_debug_info() +{ + trap '' 2 9 15 + DEBUG_INFO=/tmp/ib_debug_info.log + /bin/rm -f $DEBUG_INFO + touch $DEBUG_INFO + echo "Hostname: `hostname -s`" >> $DEBUG_INFO + test -e /etc/issue && echo "OS: `cat /etc/issue`" >> $DEBUG_INFO + echo "Current kernel: `uname -r`" >> $DEBUG_INFO + echo "Architecture: `uname -m`" >> $DEBUG_INFO + which gcc &>/dev/null && echo "GCC version: `gcc --version`" >> $DEBUG_INFO + echo "CPU: `cat /proc/cpuinfo | /bin/grep -E \"model name|arch\" | head -1`" >> $DEBUG_INFO + echo "`cat /proc/meminfo | /bin/grep \"MemTotal\"`" >> $DEBUG_INFO + echo "Chipset: `/sbin/lspci 2> /dev/null | head -1 | cut -d ':' -f 2-`" >> $DEBUG_INFO + + echo >> $DEBUG_INFO + get_sw_fw_info >> $DEBUG_INFO + echo >> $DEBUG_INFO + + echo >> $DEBUG_INFO + echo "############# LSPCI ##############" >> $DEBUG_INFO + /sbin/lspci 2> /dev/null >> $DEBUG_INFO + + echo >> $DEBUG_INFO + echo "############# LSPCI -N ##############" >> $DEBUG_INFO + /sbin/lspci -n 2> /dev/null >> $DEBUG_INFO + + echo >> $DEBUG_INFO + echo "############# LSMOD ##############" >> $DEBUG_INFO + /sbin/lsmod >> $DEBUG_INFO + + echo >> $DEBUG_INFO + echo "############# DMESG ##############" >> $DEBUG_INFO + /bin/dmesg >> $DEBUG_INFO + + if [ -r /var/log/messages ]; then + echo >> $DEBUG_INFO + echo "############# Messages ##############" >> $DEBUG_INFO + tail -50 /var/log/messages >> $DEBUG_INFO + fi + + echo >> $DEBUG_INFO + echo "############# Running Processes ##############" >> $DEBUG_INFO + /bin/ps -ef >> $DEBUG_INFO + echo "##############################################" >> $DEBUG_INFO + + echo + echo "Please open an issue in the http://bugs.openfabrics.org and attach $DEBUG_INFO" + echo +} + +ib_set_node_desc() +{ + # Wait while node's hostname is set + NODE_DESC_TIME_BEFORE_UPDATE=${NODE_DESC_TIME_BEFORE_UPDATE:-10} + local declare -i UPDATE_TIMEOUT=${NODE_DESC_UPDATE_TIMEOUT:-120} + sleep $NODE_DESC_TIME_BEFORE_UPDATE + # Reread NODE_DESC value + . $CONFIG + NODE_DESC=${NODE_DESC:-$(hostname -s)} + while [ "${NODE_DESC}" == "localhost" ] && [ $UPDATE_TIMEOUT -gt 0 ]; do + sleep 1 + . $CONFIG + NODE_DESC=${NODE_DESC:-$(hostname -s)} + let UPDATE_TIMEOUT-- + done + # Add node description to sysfs + ibsysdir="/sys/class/infiniband" + if [ -d ${ibsysdir} ]; then + declare -i hca_id=1 + for hca in ${ibsysdir}/* + do + if [ -e ${hca}/node_desc ]; then + log_msg "Set node_desc for $(basename $hca): ${NODE_DESC} HCA-${hca_id}" + echo -n "${NODE_DESC} HCA-${hca_id}" >> ${hca}/node_desc + fi + let hca_id++ + done + fi +} + + +need_location_code_fix() +{ + local sub ARCH KVERSION + ARCH=$(uname -m) + KVERSION=$(uname -r) + + if [ "$ARCH" != "ppc64" ]; then + return 1; + fi + + case $KVERSION in + 2.6.9-*.EL*) + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) + if [ $sub -lt 62 ]; then + return 2; + fi + ;; + 2.6.16.*-*-*) + sub=$(echo $KVERSION | cut -d"." -f4 | cut -d"-" -f1) + if [ $sub -lt 53 ]; then + return 0; + fi + ;; + 2.6.18-*.el5*) + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) + if [ $sub -lt 54 ]; then + return 0; + fi + ;; + 2.6.*) + sub=$(echo $KVERSION | cut -d"." -f3 | cut -d"-" -f1 | tr -d [:alpha:][:punct:]) + if [ $sub -lt 24 ]; then + return 0; + fi + ;; + esac + + return 1; +} + +fix_location_codes() +{ + # ppc64 only: + # Fix duplicate location codes on kernels where ibmebus can't handle them + + need_location_code_fix + ret=$? + if [ $ret = 1 ]; then return 0; fi + if ! [ -d /proc/device-tree -a -f /proc/ppc64/ofdt ]; then return 0; fi + + local i=1 phandle lcode len + # output all duplicate location codes and their devices + for attr in $(find /proc/device-tree -name "ibm,loc-code" | grep "lh.a"); do + echo -e $(dirname $attr)"\t"$(cat $attr) + done | sort -k2 | uniq -f1 --all-repeated=separate | cut -f1 | while read dev; do + if [ -n "$dev" ]; then + # append an instance counter to the location code + phandle=$(hexdump -e '8 "%u"' $dev/ibm,phandle) + lcode=$(cat $dev/ibm,loc-code)-I$i + len=$(echo -n "$lcode" | wc -c) + node=${dev#/proc/device-tree} + + # kernel-2.6.9 don't provide "update_property" + if [ ! -z "$(echo -n "$node" | grep "lhca")" ]; then + if [ $ret = 2 ]; then + echo -n "add_node $node" > /tmp/addnode + cd $dev + for a in *; do + SIZE=$(stat -c%s $a) + if [ "$a" = "ibm,loc-code" ] ; then + echo -n " $a $len $lcode" >> /tmp/addnode + elif [ "$a" = "interrupts" ] ; then + echo -n " $a 0 " >> /tmp/addnode + else + echo -n " $a $SIZE " >> /tmp/addnode + cat $a >> /tmp/addnode + fi + done + echo -n "remove_node $node" > /proc/ppc64/ofdt + cat /tmp/addnode > /proc/ppc64/ofdt + rm -rf /tmp/addnode + else + echo -n "update_property $phandle ibm,loc-code $len $lcode" > /proc/ppc64/ofdt + fi + i=$(($i + 1)) + fi + else + # empty line means new group -- reset i + i=1 + fi + done +} + +rotate_log() +{ + local log=$1 + if [ -s ${log} ]; then + cat ${log} >> ${log}.$(date +%Y-%m-%d) + /bin/rm -f ${log} + fi + touch ${log} +} + +is_ivyb() +{ + cpu_family=`/usr/bin/lscpu 2>&1 | grep "CPU family" | cut -d':' -f 2 | sed -e 's/ //g'` + cpu_model=`/usr/bin/lscpu 2>&1 | grep "Model:" | cut -d':' -f 2 | sed -e 's/ //g'` + + case "${cpu_family}_${cpu_model}" in + 6_62) + return 0 + ;; + *) + return 1 + ;; + esac +} + +# Check whether IPoIB interface configured to be started upon boot. +is_onboot() +{ + local i=$1 + shift + + case $DISTRIB in + RedHat|Rocks) + if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${NETWORK_CONF_DIR}/ifcfg-$i > /dev/null ; then + return 1 + fi + ;; + SuSE) + if ! LANG=C egrep -L "^STARTMODE=['\"]?onboot['\"]?" ${NETWORK_CONF_DIR}/ifcfg-$i > /dev/null ; then + return 1 + fi + ;; + Debian) + if ! ( LANG=C grep auto /etc/network/interfaces | grep -w $i > /dev/null 2>&1) ; then + return 1 + fi + ;; + *) + if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${NETWORK_CONF_DIR}/ifcfg-$i > /dev/null ; then + return 1 + fi + ;; + esac + + return 0 +} + +set_ipoib_cm() +{ + local i=$1 + shift + + if [ ! -e /sys/class/net/${i}/mode ]; then + echo "Failed to configure IPoIB connected mode for ${i}" + return 1 + fi + + sleep 1 + echo connected > /sys/class/net/${i}/mode + /sbin/ip link set ${i} mtu ${IPOIB_MTU} +} + +bring_up() +{ + local i=$1 + shift + + case $DISTRIB in + RedHat|Rocks) + . ${NETWORK_CONF_DIR}/ifcfg-${i} + if [ ! -z ${IPADDR} ] && [ ! -z ${NETMASK} ] && [ ! -z ${BROADCAST} ]; then + /sbin/ifconfig ${i} ${IPADDR} netmask ${NETMASK} broadcast ${BROADCAST} > /dev/null 2>&1 + else + /sbin/ifup ${i} 2> /dev/null + fi + ;; + SuSE) + if [ "$KPREFIX" == "26" ]; then + ifconfig ${i} up > /dev/null 2>&1 + fi + # Workaround for ifup issue: two devices with the same IP address + . ${NETWORK_CONF_DIR}/ifcfg-${i} + if [ ! -z ${IPADDR} ] && [ ! -z ${NETMASK} ] && [ ! -z ${BROADCAST} ]; then + /sbin/ifconfig ${i} ${IPADDR} netmask ${NETMASK} broadcast ${BROADCAST} > /dev/null 2>&1 + else + /sbin/ifup ${i} + fi + # /sbin/ifup ${i} > /dev/null 2>&1 + ;; + Debian) + . ${NETWORK_CONF_DIR}/ifcfg-${i} + /sbin/ip address add ${IPADDR}/${NETMASK} dev ${i} > /dev/null 2>&1 + /sbin/ip link set ${i} up > /dev/null 2>&1 + ;; + *) + /sbin/ifup ${i} 2> /dev/null + ;; + esac + + if [ "X${SET_IPOIB_CM}" == "Xyes" ]; then + set_ipoib_cm ${i} + fi + + return $? +} + +is_active_vf() +{ + # test if have ConnectX with VFs + # if not, no need to proceed further. Return 0 (no VFs active) + lspci | grep Mellanox | grep ConnectX | grep Virtual > /dev/null + if [ $? -ne 0 ] ; then + # No VFs activated + return 1 + fi + + # test for virsh + virsh -v > /dev/null 2> /dev/null + if [ $? -ne 0 ] ; then + # No virsh + return 1 + fi + + # test if running virsh by mistake on a guest + virsh sysinfo > /dev/null 2> /dev/null + if [ $? -ne 0 ] ; then + # virsh running on a guest + return 1 + fi + + # find all pci devices using the mlx4_core driver + MLX4_CORE_DEVICES=`for j in \`virsh nodedev-list | grep pci \` ; do + virsh nodedev-dumpxml $j 2> /dev/null| grep mlx4_core > /dev/null + if [ $? -eq 0 ] ; then echo $j; fi + done` + + # for all devices using mlx4_core, see if any have active VFs + ACTIVE_MLX4_VFS=`for k in \`echo $MLX4_CORE_DEVICES\` ; do + IFS=$'\n' + for f in \`virsh -d 4 nodedev-dumpxml $k | grep "address domain"\` ; do + for g in \`virsh list | grep -E "running|paused" | awk '{ print $2 }' \`; do + virsh dumpxml $g 2> /dev/null | grep $f | grep "address domain" + done + done + done` + + if [ "x$ACTIVE_MLX4_VFS" = "x" ] ; then + # NO GUESTS + return 1 + else + # There are active virtual functions + return 0 + fi +} + +start() +{ + local RC=0 + + if is_active_vf; then + echo "There are active virtual functions. Cannot continue..." + exit 1 + fi + + # W/A: inbox drivers are loaded at boot instead of new ones + local loaded_modules=$(/sbin/lsmod 2>/dev/null | grep -E '^be2net|^cxgb|^mlx|^iw_nes|^i40iw|^iw_cxgb|^ib_qib|^ib_mthca|^ocrdma|^bnxt_re|^ib_ipoib|^ib_srp|^ib_iser|^ib_uverbs|^ib_addr|^ib_mad|^ib_sa|^iw_cm|^ib_core|^ib_ucm|^ib_cm|^rdma_ucm|^ib_umad|^rdma_cm|^compat' | awk '{print $1}') + for loaded_module in $loaded_modules + do + local loaded_srcver=$(/bin/cat /sys/module/$loaded_module/srcversion 2>/dev/null) + local curr_srcver=$(/sbin/modinfo $loaded_module 2>/dev/null | grep srcversion | awk '{print $NF}') + if [ "X$loaded_srcver" != "X$curr_srcver" ]; then + log_msg "start(): Detected loaded old version of module '$loaded_module', calling stop..." + stop + break + fi + done + + # W/A: modules loaded from initrd without taking new params from /etc/modprobe.d/ + local conf_files=$(grep -rE "options.*mlx" /etc/modprobe.d/*.conf 2>/dev/null | grep -v ":#" | cut -d":" -f"1" | uniq) + local goFlag=1 + if [ "X$conf_files" != "X" ]; then + for file in $conf_files + do + while read line && [ $goFlag -eq 1 ] + do + local curr_mod=$(echo $line | sed -r -e 's/.*options //g' | awk '{print $NR}') + if ! is_module $curr_mod; then + continue + fi + for item in $(echo $line | sed -r -e "s/.*options\s*${curr_mod}//g") + do + local param=${item%=*} + local conf_value=${item##*=} + local real_value=$(cat /sys/module/${curr_mod}/parameters/${param} 2>/dev/null) + if [ "X$conf_value" != "X$real_value" ]; then + log_msg "start(): Detected '$curr_mod' loaded with '$param=$real_value' instead of '$param=$conf_value' as configured in '$file', calling stop..." + goFlag=0 + stop + break + fi + done + done < $file + if [ $goFlag -ne 1 ]; then + break + fi + done + fi + + if is_ivyb; then + # Clear SB registers on IvyB machines + ivyb_slots=`/sbin/lspci -n | grep -w '8086:0e28' | cut -d ' ' -f 1` + for ivyb_slot in $ivyb_slots + do + if [ "0x`/sbin/setpci -s $ivyb_slot 0x858.W`" == "0x0000" ]; then + setpci -s $ivyb_slot 0x858.W=0xffff + fi + if [ "0x`/sbin/setpci -s $ivyb_slot 0x85C.W`" == "0x0000" ]; then + setpci -s $ivyb_slot 0x85C.W=0xffff + fi + done + fi + + if [ $DISTRIB = "SuSE" ]; then + if [ -x /sbin/rpc.statd ]; then + /sbin/rpc.statd + fi + fi + + # Load Mellanox HCA driver + if [ "X${MTHCA_LOAD}" == "Xyes" ]; then + load_module ib_mthca + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox HCA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + if [ "X${MLX4_LOAD}" == "Xyes" ]; then + load_module mlx4_core + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox MLX4 HCA driver: " + else + # Set port configuration + if [ -f /etc/infiniband/connectx.conf ]; then + . /etc/infiniband/connectx.conf > /dev/null 2>&1 + fi + fi + load_module mlx4_ib + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox MLX4_IB HCA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + if [ "X${MLX4_EN_LOAD}" == "Xyes" ]; then + if ! is_module mlx4_core; then + load_module mlx4_core + fi + + load_module mlx4_en + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox MLX4_EN HCA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + if [ "X${MLX5_LOAD}" == "Xyes" ]; then + load_module mlx5_core + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox MLX5 HCA driver: " + fi + load_module mlx5_ib + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Mellanox MLX5_IB HCA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load QLogic QIB driver + if [ "X${QIB_LOAD}" == "Xyes" ]; then + load_module ib_qib + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading QLogic QIB driver: " + elif [ -x ${_truescale} ]; then + ${_truescale} start + fi + RC=$[ $RC + $my_rc ] + fi + + # Load QLogic InfiniPath driver + if [ "X${IPATH_LOAD}" == "Xyes" ]; then + load_module ib_ipath + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading QLogic InfiniPath driver: " + fi + # Don't exit on error + # Workarround for Bug 252. + # RC=$[ $RC + $my_rc ] + fi + + # Load eHCA driver + if [ "X${EHCA_LOAD}" == "Xyes" ]; then + fix_location_codes + load_module ib_ehca + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading eHCA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load iw_cxgb3 driver + if [ "X${CXGB3_LOAD}" == "Xyes" ]; then + fix_location_codes + load_module iw_cxgb3 + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading cxgb3 driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load iw_cxgb4 driver + if [ "X${CXGB4_LOAD}" == "Xyes" ]; then + fix_location_codes + load_module iw_cxgb4 + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading cxgb4 driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load iw_nes driver + if [ "X${NES_LOAD}" == "Xyes" ]; then + fix_location_codes + load_module iw_nes + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading nes driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load i40iw driver + if [ "X${I40IW_LOAD}" == "Xyes" ]; then + fix_location_codes + load_module i40iw + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading i40iw driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load Broadcom bnxt_re driver + if [ "X${BNXT_RE_LOAD}" == "Xyes" ]; then + load_module bnxt_re + load_module bnxt_en + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Broadcom Netxtreme driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + # Load Emulex One Connect driver + if [ "X${OCRDMA_LOAD}" == "Xyes" ]; then + load_module ocrdma + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading Emulex One Connect driver: " + elif [ -x ${_truescale} ]; then + ${_truescale} start + fi + RC=$[ $RC + $my_rc ] + fi + + # Load VMware Paravirtual RDMA driver + if [ "X${VMW_PVRDMA_LOAD}" == "Xyes" ]; then + load_module vmw_pvrdma + my_rc=$? + if [ $my_rc -ne 0 ]; then + echo_failure $"Loading VMware Paravirtual RDMA driver: " + fi + RC=$[ $RC + $my_rc ] + fi + + ib_set_node_desc > /dev/null 2>&1 & + + load_module ib_umad + RC=$[ $RC + $? ] + load_module ib_uverbs + RC=$[ $RC + $? ] + + if [ $IPOIB -eq 1 ]; then + load_module ib_ipoib + RC=$[ $RC + $? ] + fi + + if [ $RC -eq 0 ]; then + echo_success $"Loading HCA driver and Access Layer: " + else + echo_failure $"Loading HCA driver and Access Layer: " + get_debug_info + exit 1 + fi + + # Enable IPoIB Interface if configured + if [ $IPOIB -eq 1 ]; then + get_interfaces + echo Setting up InfiniBand network interfaces: + for i in $interfaces + do + if [[ ! -e ${WD}/ifcfg-${i} && ! -e ${NETWORK_CONF_DIR}/ifcfg-${i} ]]; then + echo "No configuration found for ${i}" + if [ "X${SET_IPOIB_CM}" == "Xyes" ]; then + set_ipoib_cm ${i} + fi + else + REMOVE_NETWORK_CONF=0 + if [ ! -e ${NETWORK_CONF_DIR}/ifcfg-${i} ]; then + ln -snf ${WD}/ifcfg-${i} ${NETWORK_CONF_DIR}/ifcfg-${i} + REMOVE_NETWORK_CONF=1 + fi + + if [ "$RUNMODE" != "manual" ]; then + if ! is_onboot $i; then + continue + fi + fi + + bring_up $i + RC=$? + + unset IPADDR NETMASK BROADCAST + + if [ $REMOVE_NETWORK_CONF -eq 1 ]; then + rm -f ${NETWORK_CONF_DIR}/ifcfg-${i} + fi + + if [ $RC -eq 0 ]; then + echo_success $"Bringing up interface $i:" + else + echo_failure $"Bringing up interface $i:" + fi + fi + + # Bring up child interfaces if configured + for child_conf in $(/bin/ls -1 ${NETWORK_CONF_DIR}/ifcfg-${i}.???? 2> /dev/null) + do + ch_i=${child_conf##*-} + # Skip saved interfaces rpmsave and rpmnew + if (echo $ch_i | grep rpm > /dev/null 2>&1); then + continue + fi + if [ "$RUNMODE" != "manual" ]; then + if ! is_onboot $ch_i; then + continue + fi + fi + + if [ ! -f /sys/class/net/${i}/create_child ]; then + continue + fi + + pkey=0x${ch_i##*.} + if [ ! -e /sys/class/net/${i}.${ch_i##*.} ] ; then + echo $pkey > /sys/class/net/${i}/create_child + fi + bring_up $ch_i + RC=$? + + unset IPADDR NETMASK BROADCAST + if [ $RC -eq 0 ]; then + echo_success $"Bringing up interface $ch_i:" + else + echo_failure $"Bringing up interface $ch_i:" + fi + done + done + echo_done "Setting up service network . . ." + + fi + + # Load configured modules + if [ "$POST_LOAD_MODULES" != "" ]; then + for mod in $POST_LOAD_MODULES + do + case $mod in + ib_srp) + load_module $mod + if [ "X${SRPHA_ENABLE}" == "Xyes" ]; then + if [ ! -x /sbin/multipath ]; then + echo "/sbin/multipath is required to enable SRP HA." + else + # Create 91-srp.rules file + mkdir -p /etc/udev/rules.d + if [ "$DISTRIB" == "SuSE" ]; then + cat > /etc/udev/rules.d/91-srp.rules << EOF +ACTION=="add", KERNEL=="sd*[!0-9]", RUN+="/sbin/multipath %M:%m" +EOF + fi + ${modprobe} dm_multipath > /dev/null 2>&1 + srp_daemon.sh & + srp_daemon_pid=$! + echo ${srp_daemon_pid} > ${srp_daemon_pidfile} + fi + elif [ "X${SRP_DAEMON_ENABLE}" == "Xyes" ]; then + srp_daemon.sh & + srp_daemon_pid=$! + echo ${srp_daemon_pid} > ${srp_daemon_pidfile} + fi + ;; + *) + load_module $mod + ;; + esac + RC=$? + [ $RC -ne 0 ] && echo_failure "Loading $mod" + done + fi + + # Create devices using udev + if [ -x /sbin/udevstart ]; then + UDEVSTART=/sbin/udevstart + elif [ -x /sbin/start_udev ]; then + UDEVSTART=/sbin/start_udev + else + UDEVSTART= + fi + + if [ ! -z "${UDEVSTART}" ]; then + devstart_cnt=0 + devstart_maxcnt=10 + while [ ! -d /dev/infiniband/ ] && [ $devstart_cnt -lt $devstart_maxcnt ]; do + sleep 1 + let devstart_cnt++ + done + + if [ ! -d /dev/infiniband/ ] && [ $devstart_cnt -eq $devstart_maxcnt ]; then + ${UDEVSTART} > /dev/null 2>&1 + fi + + if [ ! -d /dev/infiniband/ ]; then + echo_warning $"udevstart: No devices created under /dev/infiniband" + fi + fi + + # Create qlgc_vnic interfaces. This needs to be done after udevstart + if [ "X${QLGC_VNIC_LOAD}" == "Xyes" ]; then + if [ -x /etc/init.d/qlgc_vnic ]; then + /etc/init.d/qlgc_vnic start + fi + fi + + if [ X${RENICE_IB_MAD} == "Xyes" ]; then + # Set max_ports_num_in_hca variable + count_ib_ports + ports_num=$? + list_of_ibmads="" + for (( i=1 ; $i <= ${max_ports_num_in_hca} ; i++ )) + do + list_of_ibmads="${list_of_ibmads} ib_mad${i}" + done + + ib_mad_pids=($(pidof ${list_of_ibmads} 2> /dev/null)) + num_of_root_ibmad_procs=$(/bin/ps h -o user -p ${ib_mad_pids[*]} | grep -w root | wc -l) + get_pid_retries=0 + while [ ${num_of_root_ibmad_procs} -lt $ports_num ] + do + # Wait maximum for 5 sec to get ib_mad process pid + if [ $get_pid_retries -gt 10 ]; then + echo Failed to get $ports_num ib_mad PIDs to renice. Got ${num_of_root_ibmad_procs}. + break + fi + usleep 500000 + ib_mad_pids=($(pidof ${list_of_ibmads} 2> /dev/null)) + num_of_root_ibmad_procs=$(/bin/ps h -o user -p ${ib_mad_pids[*]} | grep -w root | wc -l) + let get_pid_retries++ + done + for ib_mad_pid in ${ib_mad_pids[*]} + do + if [ "$(/bin/ps -p ${ib_mad_pid} h -o user 2> /dev/null)" == "root" ]; then + renice -19 ${ib_mad_pid} > /dev/null 2>&1 + fi + done + fi + + if [ -x /sbin/sysctl_perf_tuning ] && [ "X${RUN_SYSCTL}" == "Xyes" ]; then + /sbin/sysctl_perf_tuning load + fi + + return $RC +} + +UNLOAD_REC_TIMEOUT=100 +unload_rec() +{ + local mod=$1 + shift + + if is_module $mod ; then + ${modprobe} -r $mod >/dev/null 2>&1 + if [ $? -ne 0 ];then + for dep in `/sbin/rmmod $mod 2>&1 | grep "is in use by" | sed -r -e 's/.*use by //g' | sed -e 's/,/ /g'` + do + # if $dep was not loaded by openibd, don't unload it; fail with error. + if ! `echo $UNLOAD_MODULES | grep -q $dep` ; then + rm_mod $mod + else + unload_rec $dep + fi + done + fi + if is_module $mod ; then + if [ "X$RUNMODE" == "Xauto" ] && [ "X$mod" == "Xmlx4_core" ] && [ $UNLOAD_REC_TIMEOUT -gt 0 ]; then + let UNLOAD_REC_TIMEOUT-- + sleep 1 + unload_rec $mod + else + rm_mod $mod + fi + fi + fi +} + +rm_mod() +{ + local mod=$1 + shift + + unload_log=`/sbin/rmmod $mod 2>&1` + if [ $? -ne 0 ]; then + echo_failure $"Unloading $mod" + if [ ! -z "${unload_log}" ]; then + echo $unload_log + fi + # get_debug_info + [ ! -z $2 ] && echo $2 + exit 1 + fi +} + +unload() +{ + # Unload module $1 + local mod=$1 + local unload_log + + if is_module $mod; then + case $mod in + ib_ipath) + # infinipath depends on modprobe.conf remove rule + unload_rec $mod + sleep 2 + ;; + ib_qib) + if [ -x ${_truescale} ]; then + ${_truescale} stop + fi + + if [ -d /ipathfs ]; then + umount /ipathfs + rmdir /ipathfs + fi + + unload_rec $mod + sleep 2 + ;; + ib_mthca | mlx4_ib | mlx5_ib | ib_ehca | iw_cxgb3 | iw_cxgb4 | iw_nes | i40iw) + unload_rec $mod + sleep 2 + ;; + *) + unload_rec $mod + if [ $? -ne 0 ] || is_module $mod; then + # Try rmmod if modprobe failed: case that previous installation included more IB modules. + unload_rec $mod + fi + ;; + esac + fi +} + +stop() +{ + + # Check if Lustre is loaded + if ( grep -q "ko2iblnd" /proc/modules ); then + echo + echo "Please stop Lustre services before unloading the" + echo "Infiniband stack." + echo + exit 1 + fi + + if is_active_vf; then + echo "There are active virtual functions. Cannot continue..." + exit 1 + fi + + # Check if applications which use infiniband are running + local apps="opensm osmtest ibbs ibns ibacm iwpmd" + local pid + + for app in $apps + do + if ( /usr/bin/pgrep $app > /dev/null 2>&1 ); then + echo + echo "Please stop \"$app\" and all applications running over InfiniBand" + echo "Then run \"$0 $ACTION\"" + echo + exit 1 + fi + done + + # Lookup for remaining applications using infiniband devices + local entries + + if [ -d /dev/infiniband ]; then + entries=$(lsof +c 0 +d /dev/infiniband 2>/dev/null | grep -v "^COMMAND" | \ + awk '{print $1 " " $2 " " $3 " " $NF}' | sort -u) + fi + + if [ -n "$entries" ]; then + + echo "Please stop the following applications still using Infiniband devices:" + + while IFS= read -r entry; do + app=$(echo "$entry" | cut -f1 -d' ') + pid=$(echo "$entry" | cut -f2 -d' ') + owner=$(echo "$entry" | cut -f3 -d' ') + device=$(echo "$entry" | cut -f4 -d' ' | awk -F/ '{print $NF}') + + echo "$app($pid) user $owner is using device $device" + done <<< "$entries" + + echo + echo "Then run \"$0 $ACTION\"" + + exit 1 + fi + + # W/A for http://bugs.openfabrics.org/bugzilla/show_bug.cgi?id=2259 + for bond in $(cat /sys/class/net/bonding_masters 2> /dev/null) ; do + if_type=$(cat /sys/class/net/$bond/type 2> /dev/null) + if [ $if_type -eq 32 ] ; then + for slave in $(cat /sys/class/net/$bond/bonding/slaves 2> /dev/null) ; do + echo -$slave > /sys/class/net/$bond/bonding/slaves + done + echo -$bond > /sys/class/net/bonding_masters + fi + done + + # Check if open-iscsi is running and if there are open iSER sessions + if [ $(pidof iscsid | wc -w) -gt 0 ]; then + iser_session_cnt=$(iscsiadm -m session 2>&1 | grep -c "^iser") + + if [ $iser_session_cnt -gt 0 ]; then + echo + # If it's RH4, open-iscsi must be stopped before openibd + if [[ -f /etc/redhat-release && $(grep -c "Red Hat Enterprise Linux AS release 4" /etc/redhat-release) -eq 1 ]]; then + echo "Please stop open-iscsi: /etc/init.d/iscsi stop" + else + echo "Please logout from all open-iscsi over iSER sessions" + fi + echo "Then run \"$0 $ACTION\"" + echo + exit 1 + fi + fi + + # Check for any multipath devices running over SRP devices + if is_module ib_srp; then + for f in `/bin/ls /sys/class/scsi_host`; do + if [ -f /sys/class/scsi_host/$f/local_ib_port ]; then + for i in `/bin/ls /sys/class/scsi_host/$f/device/target*/*/block* | awk -F: '{print $NF}'` + do + holders=`ls /sys/block/$i/holders 2> /dev/null` + if [ -n "$holders" ]; then + echo "Please flush multipath devices running over SRP devices" + echo + exit 1 + fi + done + fi + done + fi + # Stop IPoIB HA daemon if running + if [ -f $ipoib_ha_pidfile ]; then + local line p + read line < $ipoib_ha_pidfile + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && ipoib_ha_pids="$ipoib_ha_pids $p" + done + /bin/rm -f $ipoib_ha_pidfile + fi + + if [ -n "${ipoib_ha_pids:-}" ]; then + kill -9 ${ipoib_ha_pids} > /dev/null 2>&1 + mcastpid=$(pidof -x mcasthandle) + if [ -n "${mcastpid:-}" ]; then + kill -9 ${mcastpid} > /dev/null 2>&1 + fi + fi + + # Stop SRP HA daemon if running + if [ -f $srp_daemon_pidfile ]; then + local line p + read line < $srp_daemon_pidfile + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && srp_daemon_pids="$srp_daemon_pids $p" + done + /bin/rm -f $srp_daemon_pidfile + fi + + if [ -n "${srp_daemon_pids:-}" ]; then + kill -15 ${srp_daemon_pids} > /dev/null 2>&1 + fi + + if [ "X${SRPHA_ENABLE}" == "Xyes" ]; then + /bin/rm -f /etc/udev/rules.d/91-srp.rules > /dev/null 2>&1 + mpath_pids=$(pidof -x multipath) + if [ -n "${mpath_pids:-}" ]; then + kill -9 ${mpath_pids} > /dev/null 2>&1 + fi + + if is_module ib_srp; then + for f in `/bin/ls /sys/class/scsi_host` + do + if [ -f /sys/class/scsi_host/$f/local_ib_port ]; then + for i in `/bin/ls -d /sys/class/scsi_host/$f/device/target*/*/block* | awk -F: '{print $NF}'` + do + mdev=`/sbin/scsi_id -g -s /block/$i 2> /dev/null` + if [ -n "${mdev}" ]; then + /sbin/multipath -f $mdev > /dev/null 2>&1 + fi + done + fi + done + fi + fi + + if [ -d /sys/class/infiniband_qlgc_vnic/ ]; then + if [ -x /etc/init.d/qlgc_vnic ]; then + /etc/init.d/qlgc_vnic stop 2>&1 1>/dev/null + fi + fi + + # Unload modules + if [ "$UNLOAD_MODULES" != "" ]; then + for mod in $UNLOAD_MODULES + do + unload $mod + done + fi + + # Unload mlx4_core + if is_module mlx4_core; then + is_ref mlx4_core + if [ $? -eq 0 ]; then + unload mlx4_core + elif is_module mlx4_en; then + # Unload mlx4_en if one or more of the following cases takes place: + # - No MLX4 eth devices present + # - mlx4_en module was not loaded by the openibd script + if (grep 0x15b3 /sys/class/net/eth*/device/vendor > /dev/null 2>&1) && [ "X$MLX4_EN_LOAD" != "Xyes" ]; then + echo "MLX4_EN module is loaded and in use." + echo "To unload MLX4_EN run: 'modprobe -r mlx4_en mlx4_core'" + else + unload mlx4_en + unload mlx4_core + fi + fi + fi + + if [ -x /sbin/sysctl_perf_tuning ] && [ "X${RUN_SYSCTL}" == "Xyes" ]; then + /sbin/sysctl_perf_tuning unload + fi + + /bin/rm -rf /dev/infiniband + echo_success $"Unloading HCA driver: " + sleep 1 +} + +status() +{ + local RC=0 + + if is_module ib_mthca || is_module mlx4_core || is_module mlx5_core || is_module ib_qib || is_module ib_ipath || is_module ib_ehca || is_module iw_cxgb3 || is_module iw_cxgb4 || is_module iw_nes || is_module i40iw; then + echo + echo " HCA driver loaded" + echo + else + echo + echo $"HCA driver is not loaded" + echo + fi + + if is_module ib_ipoib; then + get_interfaces + if [ -n "$interfaces" ]; then + echo $"Configured IPoIB devices:" + echo $interfaces + echo + echo $"Currently active IPoIB devices:" + + for i in $interfaces + do + if [[ ! -e ${NETWORK_CONF_DIR}/ifcfg-${i} ]]; then + continue + fi + echo `/sbin/ip -o link show $i | awk -F ": " '/UP>/ { print $2 }'` + RC=$? + done + fi + fi + + if is_module mlx4_en; then + get_mlx4_en_interfaces + if [ -n "$mlx4_en_interfaces" ]; then + echo $"Configured MLX4_EN devices:" + echo $mlx4_en_interfaces + echo + echo $"Currently active MLX4_EN devices:" + + for i in $mlx4_en_interfaces + do + echo `/sbin/ip -o link show $i | awk -F ": " '/UP>/ { print $2 }'` + done + fi + fi + + echo + + local cnt=0 + + for mod in $STATUS_MODULES + do + if is_module $mod; then + [ $cnt -eq 0 ] && echo "The following OFED modules are loaded:" && echo + let cnt++ + echo " $mod" + fi + done + + echo + + return $RC +} + + +RC=0 +start_time=$(date +%s | tr -d '[:space:]') + +trap_handler() +{ + let run_time=$(date +%s | tr -d '[:space:]')-${start_time} + + # Ask to wait for 5 seconds if trying to stop openibd + if [ $run_time -gt 5 ] && [ "$ACTION" == "stop" ]; then + printf "\nProbably some application are still using InfiniBand modules...\n" + else + printf "\nPlease wait ...\n" + fi + return 0 +} + +trap 'trap_handler' 2 9 15 + +case $ACTION in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + status + ;; + *) + echo + echo "Usage: `basename $0` {start|stop|restart|status}" + echo + exit 1 + ;; +esac + +RC=$? +exit $RC diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd.service b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd.service new file mode 100644 index 0000000..d71e899 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/infiniband/openibd.service @@ -0,0 +1,22 @@ +[Unit] +SourcePath=/etc/init.d/openibd +Description=LSB: Activates/Deactivates InfiniBand Driver to start at boot time. +Before=runlevel2.target runlevel3.target runlevel5.target shutdown.target +After=local-fs.target network.target network-online.target +Conflicts=shutdown.target + +[Service] +Type=forking +Restart=no +TimeoutSec=5min +IgnoreSIGPIPE=no +KillMode=process +GuessMainPID=no +RemainAfterExit=yes +SysVStartPriority=1 +ExecStart=/etc/init.d/openibd start +ExecStop=/etc/init.d/openibd stop + +[Install] +WantedBy=multi-user.target +WantedBy=network-online.target diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/kexec/kexec b/grid5000/steps/data/setup/puppet/modules/env/files/base/kexec/kexec new file mode 100644 index 0000000..5a7e9db --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/kexec/kexec @@ -0,0 +1,13 @@ +# Defaults for kexec initscript +# sourced by /etc/init.d/kexec and /etc/init.d/kexec-load + +# Load a kexec kernel (true/false) +LOAD_KEXEC=false + +# Kernel and initrd image +KERNEL_IMAGE="/vmlinuz" +INITRD="/initrd.img" + +# If empty, use current /proc/cmdline +APPEND="" + diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/mx/ip_over_mx b/grid5000/steps/data/setup/puppet/modules/env/files/base/mx/ip_over_mx new file mode 100644 index 0000000..d9f6ceb --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/mx/ip_over_mx @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +if [ "$IFACE" != "myri0" ]; then + exit 0 +fi + +SHORTNAME=$(hostname -s) + +/etc/init.d/mx start + +/sbin/ifconfig "$IFACE" $(gethostip -d "$SHORTNAME-$IFACE") netmask 255.255.240.0 up diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/ndctl/ndctl.preset b/grid5000/steps/data/setup/puppet/modules/env/files/base/ndctl/ndctl.preset new file mode 100644 index 0000000..d487ae7 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/ndctl/ndctl.preset @@ -0,0 +1 @@ +disable ndctl-monitor.service diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/sshfs/40-fuse.rules b/grid5000/steps/data/setup/puppet/modules/env/files/base/sshfs/40-fuse.rules new file mode 100644 index 0000000..9585111 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/sshfs/40-fuse.rules @@ -0,0 +1 @@ +KERNEL=="fuse", MODE="0666" diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/limits-grid5000.conf b/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/limits-grid5000.conf new file mode 100644 index 0000000..9483bec --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/limits-grid5000.conf @@ -0,0 +1,5 @@ +# Grid 5000 +# Needed for openmpi +* hard memlock unlimited +* soft memlock unlimited + diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/sysctl-00-grid5000.conf b/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/sysctl-00-grid5000.conf new file mode 100644 index 0000000..ff44ed6 --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/tuning/sysctl-00-grid5000.conf @@ -0,0 +1,6 @@ +# +# Grid'5000 Tuning +net.ipv4.tcp_rmem=4096 87380 67108864 +net.ipv4.tcp_wmem=4096 16384 67108864 +net.core.rmem_max = 4194304 +net.core.wmem_max = 4194304 diff --git a/grid5000/steps/data/setup/puppet/modules/env/files/base/userns/sysctl-00-userns.conf b/grid5000/steps/data/setup/puppet/modules/env/files/base/userns/sysctl-00-userns.conf new file mode 100644 index 0000000..575f6aa --- /dev/null +++ b/grid5000/steps/data/setup/puppet/modules/env/files/base/userns/sysctl-00-userns.conf @@ -0,0 +1,2 @@ +# Necessaire pour Nix +kernel.unprivileged_userns_clone=1 |