function net_init_failed() {
echo
- echo $(date "+%H:%M:%S") " pl_netinit: network initialization failed,"
+ echo $(date "+%H:%M:%S") " pl_netinit: network initialization failed with interface $ETH_DEVICE"
+ echo
+ echo For forensics
+ echo
+ echo ========== lspci beg
+ /sbin/lspci -n | /bin/grep "Class 0200"
+ echo ========== lspci end
+ echo
+ echo ========== ifconfig beg
+ /sbin/ifconfig
+ echo ========== ifconfig end
+ echo ========== ip address show beg
+ ip address show
+ echo ========== ip address show end
+ echo
+ echo $(date "+%H:%M:%S") " pl_netinit: network initialization failed with interface $ETH_DEVICE"
echo $(date "+%H:%M:%S") " pl_netinit: shutting down machine in two hours"
/bin/sleep 2h
/sbin/shutdown -h now
if [[ -n "$NET_DEVICE" ]]; then
# the user specified a mac address we should use. find the network
# device for it.
- NET_DEVICE=$(tr A-Z a-z <<<$NET_DEVICE)
+ NET_DEVICE=$(tr A-Z a-z <<< $NET_DEVICE)
pushd /sys/class/net
for device in *; do
fi
# within a systemd-driven startup, we often see this stage
-# triggered before the network is actually up
+# triggered before the network interface is actually exposed
+# by udev/kernel
# although of course we have network-online.target
# as a requirement; go figure what systemd actually does..
# in any case, let us try to work around that by allowing some delay
# here
-TIMEOUT=30
+# tmp: Thierry June 2015
+# on fedora 21 nodes we see this running in a context where eth0 is not known to the system
+# could be related to a dependency that we poorly describe to systemd
+# I am increasing this timeout to 2 minutes in order to check that conjecture
+ALLOW=60
+COUNTER=0
while true; do
if /sbin/ifconfig $ETH_DEVICE >& /dev/null; then
- echo Device $ETH_DEVICE found - proceeding - timeout=$TIMEOUT
+ echo $(date "+%H:%M:%S") " pl_netinit: device present $ETH_DEVICE, proceeding (${COUNTER}s/${ALLOW}s)"
break
fi
- echo $(date "+%H:%M:%S") " pl_netinit: waiting for device $ETH_DEVICE, ${TIMEOUT}s remaining "
- TIMEOUT=$(($TIMEOUT-1))
- [ $TIMEOUT -le 0 ] && net_init_failed
+ echo $(date "+%H:%M:%S") " pl_netinit: waiting for device $ETH_DEVICE - ${COUNTER}s/${ALLOW}s"
+ set -x
+ /sbin/ifconfig
+ journalctl -b | egrep 'eth|bnx|udev'
+ systemctl list-unit-files | grep -i network
+ set +x
+ COUNTER=$(($COUNTER+1))
+ [ $COUNTER -ge $ALLOW ] && net_init_failed
sleep 1
done
echo $(date "+%H:%M:%S") " pl_netinit: this CD does not have hardware support for your"
echo $(date "+%H:%M:%S") " pl_netinit: network adapter. please send the following lines"
echo $(date "+%H:%M:%S") " pl_netinit: to your PlanetLab support for further assistance"
- echo ========== lspci beg
- /sbin/lspci -n | /bin/grep "Class 0200"
- echo ========== lspci end
- echo ========== ifconfig beg
- /sbin/ifconfig
- echo ========== ifconfig beg
-
net_init_failed
}
/sbin/ifconfig $ETH_DEVICE > $IFCONFIG_OUTPUT
echo $(date "+%H:%M:%S") " pl_netinit: network online"
+
+function pl_netinit_forensics () {
+
+ echo "-------------------- BEG post pl_netinit forensics"
+
+ file=/root/.ssh/authorized_keys
+ echo "$file"
+ if ! [ -f "$file" ]; then
+ echo "!!!!!!!!!! missing $file"
+ else
+ echo "---------- $file"
+ cat $file
+ fi
+
+ file="/etc/ssh/sshd_config"
+ if ! [ -f "$file" ]; then
+ echo "!!!!!!!!!! missing $file"
+ else
+ echo '---------- egrep Pass|Auth|PAM|Root' $file
+ grep -v '^#' $file | egrep 'Pass|Auth|PAM|Root'
+ fi
+
+ # on f22 we see an emty resolv.conf...
+ file=/etc/resolv.conf
+ if ! [ -f "$file" ]; then
+ echo "!!!!!!!!!! missing $file"
+ else
+ echo "---------- $file"
+ cat $file
+ fi
+
+ echo "-------------------- END post pl_netinit forensics"
+}
+
+pl_netinit_forensics