X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=initscripts%2Fpl_netinit;h=2363ceade051160ab9b5292f051b2f8e7bfe1be5;hb=5c7527aa84590ea6b852d4cb1e3be0899351387c;hp=4e6b770cd4c1c15d20919be7a2dc5546cd8c2450;hpb=fd85a905ec4b0e4810adf053d9bdcd6c22d23f98;p=bootcd.git diff --git a/initscripts/pl_netinit b/initscripts/pl_netinit index 4e6b770..2363cea 100755 --- a/initscripts/pl_netinit +++ b/initscripts/pl_netinit @@ -1,4 +1,7 @@ -#!/bin/sh +#!/bin/bash +#-*-shell-script-*- + +set -x # the name of the floppy based network configuration # files (checked first). the name planet.cnf is kept @@ -55,38 +58,21 @@ IFCONFIG_OUTPUT=/tmp/ifconfig DEFAULT_NET_CONF=0 -net_init_failed() -{ - echo - echo $(date "+%H:%M:%S") " pl_netinit: network initialization failed," - echo $(date "+%H:%M:%S") " pl_netinit: shutting down machine in two hours" - /bin/sleep 2h - /sbin/shutdown -h now +function net-init-failed() { + verbose-message "pl_netinit: network initialization failed with interface $ETH_DEVICE" + verbose-forensics "failed to configure $ETH_DEVICE" + hang-and-shutdown "net-init-failed" exit 1 } -# Function for checking the IP address to see if its sensible. -check_ip() -{ - case "$*" in - "" | *[!0-9.]* | *[!0-9]) return 1 ;; - esac - local IFS=. - set -- $* - [ $# -eq 4 ] && - [ ${1:-666} -le 255 ] && [ ${2:-666} -le 255 ] && - [ ${3:-666} -le 255 ] && [ ${4:-666} -le 255 ] -} - # find and parse a node network configuration file. return 0 if not found, # return 1 if found and parsed. if this is the case, DEFAULT_NET_CONF will # be set to 1. For any found configuration file, $USED_NET_CONF will # contain the validated contents -find_node_config() -{ +function find-node-config() { /bin/rm -f $TMP_OLD_FLOPPY_CONF_FILE 2>&1 > /dev/null - echo $(date "+%H:%M:%S") " pl_netinit: looking for node configuration file on floppy" + verbose-message "pl_netinit: looking for node configuration file on floppy" /bin/mount -o ro -t $NODE_CONF_DEVICE_FS_TYPES /dev/fd0 \ $CONF_DEVICE_MOUNT_POINT 2>&1 > /dev/null @@ -95,7 +81,7 @@ find_node_config() # 1. check for new named file first on the floppy disk if [ -r "$CONF_DEVICE_MOUNT_POINT/$NEW_NODE_CONF_NAME" ]; then conf_file="$CONF_DEVICE_MOUNT_POINT/$NEW_NODE_CONF_NAME" - echo $(date "+%H:%M:%S") " pl_netinit: found node configuration file $conf_file" + verbose-message "pl_netinit: found node configuration file $conf_file" /etc/init.d/pl_validateconf < $conf_file > $USED_NET_CONF /bin/umount $CONF_DEVICE_MOUNT_POINT return 1 @@ -107,14 +93,14 @@ find_node_config() elif [ -r "$CONF_DEVICE_MOUNT_POINT/$OLD_NODE_CONF_NAME" ]; then conf_file="$CONF_DEVICE_MOUNT_POINT/$OLD_NODE_CONF_NAME" /bin/cp -f $conf_file $TMP_OLD_FLOPPY_CONF_FILE - echo $(date "+%H:%M:%S") " pl_netinit: found old named configuration file, checking later." + verbose-message "pl_netinit: found old named configuration file, checking later." else - echo $(date "+%H:%M:%S") " pl_netinit: floppy mounted, but no configuration file." + verbose-message "pl_netinit: floppy mounted, but no configuration file." fi /bin/umount $CONF_DEVICE_MOUNT_POINT else - echo $(date "+%H:%M:%S") " pl_netinit: no floppy could be mounted, continuing search." + verbose-message "pl_netinit: no floppy could be mounted, continuing search." fi # 2. check for a new named file on removable flash devices (those @@ -122,13 +108,13 @@ find_node_config() # to prevent checking normal scsi disks, also make sure # /sys/block//removable is set to 1 - echo $(date "+%H:%M:%S") " pl_netinit: looking for node configuration file on flash based devices" + verbose-message "pl_netinit: looking for node configuration file on flash based devices" # make the sd* hd* expansion fail to an empty string if there are no sd # devices shopt -s nullglob - for device in /sys/block/[hs]d*; do + for device in /sys/block/[hsv]d*; do removable=$(cat $device/removable) if [[ $removable -ne 1 ]]; then continue @@ -138,20 +124,20 @@ find_node_config() for partition in $partitions ; do check_dev=/dev/$partition - echo $(date "+%H:%M:%S") " pl_netinit: looking for node configuration file on device $check_dev" + verbose-message "pl_netinit: looking for node configuration file on device $check_dev" /bin/mount -o ro -t $NODE_CONF_DEVICE_FS_TYPES $check_dev \ $CONF_DEVICE_MOUNT_POINT 2>&1 > /dev/null if [[ $? -eq 0 ]]; then if [ -r "$CONF_DEVICE_MOUNT_POINT/$NEW_NODE_CONF_NAME" ]; then conf_file="$CONF_DEVICE_MOUNT_POINT/$NEW_NODE_CONF_NAME" - echo $(date "+%H:%M:%S") " pl_netinit: found node configuration file $conf_file" + verbose-message "pl_netinit: found node configuration file $conf_file" /etc/init.d/pl_validateconf < $conf_file > $USED_NET_CONF - echo $(date "+%H:%M:%S") " pl_netinit: found configuration" + verbose-message "pl_netinit: found configuration" /bin/umount $CONF_DEVICE_MOUNT_POINT return 1 fi - echo $(date "+%H:%M:%S") " pl_netinit: not found" + verbose-message "pl_netinit: ERROR - not found" /bin/umount $CONF_DEVICE_MOUNT_POINT fi @@ -165,27 +151,27 @@ find_node_config() # the file $TMP_OLD_FLOPPY_CONF_FILE will be readable. if [ -r "$TMP_OLD_FLOPPY_CONF_FILE" ]; then conf_file=$TMP_OLD_FLOPPY_CONF_FILE - echo $(date "+%H:%M:%S") " pl_netinit: found node configuration file $conf_file" + verbose-message "pl_netinit: found node configuration file $conf_file" /etc/init.d/pl_validateconf < $conf_file > $USED_NET_CONF return 1 fi # 4. check for plnode.txt on the cd at /usr/boot - echo $(date "+%H:%M:%S") " pl_netinit: looking for network configuration on cd in /usr/boot" + verbose-message "pl_netinit: looking for network configuration on cd in /usr/boot" if [ -r "$CD_NET_CONF_BOOT" ]; then - echo $(date "+%H:%M:%S") " pl_netinit: found cd configuration file $CD_NET_BOOT_CONF" + verbose-message "pl_netinit: found cd configuration file $CD_NET_BOOT_CONF" /etc/init.d/pl_validateconf < $CD_NET_CONF_BOOT > $USED_NET_CONF return 1 fi # 5. check for plnode.txt on the cd at /usr - echo $(date "+%H:%M:%S") " pl_netinit: looking for network configuration on cd in /usr" + verbose-message "pl_netinit: looking for network configuration on cd in /usr" if [ -r "$CD_NET_CONF_ROOT" ]; then - echo $(date "+%H:%M:%S") " pl_netinit: found cd configuration file $CD_NET_CONF_ROOT" + verbose-message "pl_netinit: found cd configuration file $CD_NET_CONF_ROOT" /etc/init.d/pl_validateconf < $CD_NET_CONF_ROOT > $USED_NET_CONF return 1 fi @@ -195,9 +181,9 @@ find_node_config() # builtin default. this can't be used to install a machine, but # will at least let it download and run the boot manager, which # can inform the users appropriately. - echo $(date "+%H:%M:%S") " pl_netinit: using default network configuration" + verbose-message "pl_netinit: using default network configuration" if [ -r "$FALLBACK_NET_CONF" ]; then - echo $(date "+%H:%M:%S") " pl_netinit: found cd default configuration file $FALLBACK_NET_CONF" + verbose-message "pl_netinit: found cd default configuration file $FALLBACK_NET_CONF" /etc/init.d/pl_validateconf < $FALLBACK_NET_CONF > $USED_NET_CONF DEFAULT_NET_CONF=1 return 1 @@ -207,16 +193,16 @@ find_node_config() } -echo $(date "+%H:%M:%S") " pl_netinit: bringing loopback network device up" +verbose-message "pl_netinit: bringing loopback network device up" /sbin/ifconfig lo 127.0.0.1 up -find_node_config +find-node-config if [ $? -eq 0 ]; then # no network configuration file found. this should not happen as the # default cd image has a backup one. halt. - echo $(date "+%H:%M:%S") " pl_netinit: unable to find even a default network configuration" - echo $(date "+%H:%M:%S") " pl_netinit: file, this cd may be corrupt." - net_init_failed + verbose-message "pl_netinit: ERROR - unable to find even a default network configuration" + verbose-message "pl_netinit: file, this cd may be corrupt." + net-init-failed fi # load the configuration file. if it was a default one (not user specified), @@ -225,7 +211,7 @@ fi # they will fail (as they should) - but the network will be up if dhcp is # available -echo $(date "+%H:%M:%S") " pl_netinit: loading network configuration" +verbose-message "pl_netinit: loading network configuration" . $USED_NET_CONF if [[ $DEFAULT_NET_CONF -eq 1 ]]; then @@ -252,14 +238,14 @@ ETH_DEVICE= if [[ -n "$NET_DEVICE" ]]; then # the user specified a mac address we should use. find the network # device for it. - NET_DEVICE=$(tr A-Z a-z <<<$NET_DEVICE) + NET_DEVICE=$(tr A-Z a-z <<< $NET_DEVICE) pushd /sys/class/net for device in *; do dev_address=$(cat $device/address | tr A-Z a-z) if [ "$device" == "$NET_DEVICE" -o "$dev_address" == "$NET_DEVICE" ]; then ETH_DEVICE=$device - echo $(date "+%H:%M:%S") " pl_netinit: found device $ETH_DEVICE with mac address $dev_address" + verbose-message "pl_netinit: found device $ETH_DEVICE with mac address $dev_address" break fi done @@ -279,33 +265,86 @@ fi # still nothing? fail the boot. if [[ -z "$ETH_DEVICE" ]]; then - echo $(date "+%H:%M:%S") " pl_netinit: unable to find a usable device, check to make sure" - echo $(date "+%H:%M:%S") " pl_netinit: the NET_DEVICE field in the configuration file" - echo $(date "+%H:%M:%S") " pl_netinit: corresponds with a network adapter on this system" - net_init_failed + verbose-message "pl_netinit: unable to find a usable device, check to make sure" + verbose-message "pl_netinit: the NET_DEVICE field in the configuration file" + verbose-message "pl_netinit: corresponds with a network adapter on this system" + net-init-failed fi +# within a systemd-driven startup, we often see this stage +# triggered before the network interface is actually exposed +# by udev/kernel +# although of course we have network-online.target +# as a requirement; go figure what systemd actually does.. + +# in any case, let us try to work around that by allowing some delay +# here + +# tmp: Thierry June 2015 +# on fedora 21 nodes we see this running in a context where eth0 is not known to the system +# could be related to a dependency that we poorly describe to systemd +# I am increasing this timeout to 2 minutes in order to check that conjecture +ALLOW=60 +COUNTER=0 +while true; do + if /sbin/ifconfig $ETH_DEVICE >& /dev/null; then + verbose-message "pl_netinit: device present $ETH_DEVICE, proceeding (${COUNTER}s/${ALLOW}s)" + break + fi + verbose-message "pl_netinit: waiting for device $ETH_DEVICE - ${COUNTER}s/${ALLOW}s" + set -x + /sbin/ifconfig + journalctl -b | egrep 'eth|bnx|udev' + systemctl list-unit-files | grep -i network + set +x + COUNTER=$(($COUNTER+1)) + [ $COUNTER -ge $ALLOW ] && net-init-failed + sleep 1 +done + # actually check to make sure ifconfig succeeds -/sbin/ifconfig $ETH_DEVICE up 2>&1 > /dev/null -if [[ $? -ne 0 ]]; then - echo $(date "+%H:%M:%S") " pl_netinit: device $ETH_DEVICE does not exist, most likely" - echo $(date "+%H:%M:%S") " pl_netinit: this cd does not have hardware support for your" - echo $(date "+%H:%M:%S") " pl_netinit: network adapter. please send the following lines" - echo $(date "+%H:%M:%S") " pl_netinit: to PlanetLab Support: support@planet-lab.org" - echo $(date "+%H:%M:%S") " pl_netinit: for further assistance" - echo - /sbin/lspci -n | /bin/grep "Class 0200" - echo - - net_init_failed -fi -echo $(date "+%H:%M:%S") " pl_netinit: attempting to start networking" +/sbin/ifconfig $ETH_DEVICE up 2>&1 > /dev/null || { + verbose-message "pl_netinit: device $ETH_DEVICE does not exist, most likely" + verbose-message "pl_netinit: this CD does not have hardware support for your" + verbose-message "pl_netinit: network adapter. please send the following lines" + verbose-message "pl_netinit: to your PlanetLab support for further assistance" + net-init-failed +} + +verbose-message "pl_netinit: attempting to start networking" /sbin/service network start # for backwards compatibility /sbin/ifconfig $ETH_DEVICE > $IFCONFIG_OUTPUT -echo $(date "+%H:%M:%S") " pl_netinit: network online" +verbose-message "pl_netinit: network online" + +# patch for f22 - if /etc/resolv.conf is empty in static mode +function pl-netinit-patch-resolv-conf () { + file=/etc/resolv.conf + needed="" + # missing file : patch needed + if ! [ -f $file ]; then + needed=true + # empty file : patch needed + elif cmp $file /dev/null; then + needed=true + fi + if [ -n "$needed" ]; then + verbose-message "pl_netinit: patching $file" + source /etc/sysconfig/network-scripts/ifcfg-${ETH_DEVICE} + # delete because it's a symlink to /run/systemd/resolve/resolv.conf + # which looks really weird (ls -lL /etc/resolv.conf does not show anything) + rm $file + ( [ -n "$DNS1" ] && echo nameserver $DNS1; \ + [ -n "$DNS2" ] && echo nameserver $DNS2 ) > $file + else + echo pl_netinit has no need to patch $file + fi +} + +pl-netinit-patch-resolv-conf +verbose-forensics "pl_netinit epilogue"