%define name bootmanager
%define version 5.0
-%define taglevel 6
+%define taglevel 12
%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}
/etc/plc.d/bootmanager
%changelog
+* Thu Aug 26 2010 S.Çağlar Onur <caglar@cs.princeton.edu> - bootmanager-5.0-12
+- Revert "replace deprecated popen2 with subprocess"
+
+* Wed Aug 11 2010 S.Çağlar Onur <caglar@cs.princeton.edu> - bootmanager-5.0-11
+- replace deprecated popen2 with subprocess and handle fsck return codes in a different code path
+
+* Fri Jul 30 2010 S.Çağlar Onur <caglar@cs.princeton.edu> - bootmanager-5.0-10
+- Fix typo
+
+* Fri Jul 30 2010 Baris Metin <Talip-Baris.Metin@sophia.inria.fr> - bootmanager-5.0-9
+- fix typo
+
+* Wed Jul 28 2010 S.Çağlar Onur <caglar@cs.princeton.edu> - bootmanager-5.0-8
+- disable time/count based filesystem checks
+
+* Tue Jul 27 2010 S.Çağlar Onur <caglar@cs.princeton.edu> - bootmanager-5.0-7
+- Fix new disk additions to LVM array
+
* Wed Jul 07 2010 Thierry Parmentelat <thierry.parmentelat@sophia.inria.fr> - BootManager-5.0-6
- bugfix for centos5/python2.4 missing hashlib
from Exceptions import *
import notify_messages
import BootServerRequest
+import utils
# all output is written to this file
BM_NODE_LOG= "/tmp/bm.log"
self.LogEntry( str, 0, 1 )
# bm log uploading is available back again, as of nodeconfig-5.0-2
- def Upload( self ):
+ def Upload( self, extra_file=None ):
"""
upload the contents of the log to the server
"""
FormData = [('log',(pycurl.FORM_FILE, self.OutputFilePath)),
("hostname",hostname),
("type","bm.log")])
+ if extra_file is not None:
+ # NOTE: for code-reuse, evoke the bash function 'upload_logs';
+ # by adding --login, bash reads .bash_profile before execution.
+ # Also, never fail, since this is an optional feature.
+ utils.sysexec( """bash --login -c "upload_logs %s || /bin/true" """ % extra_file, self)
##############################
# the data back to PlanetLab central
LOG= log( BM_NODE_LOG )
+ # NOTE: assume CWD is BM's source directory, but never fail
+ utils.sysexec("./setup_bash_history_scripts.sh || /bin/true", LOG)
+
LOG.LogEntry( "BootManager started at: %s" % \
time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) )
--- /dev/null
+#!/bin/bash
+
+BASH_PROFILE=/root/.bash_profile
+HISTORY_PROFILE=/etc/profile.d/histlog.sh
+PERIODIC_SCRIPT=/usr/bin/periodic_upload.sh
+
+cat <<\EOF > $BASH_PROFILE
+# NOTE: only upload incremental diffs
+if [ -f /tmp/source/configuration ] ; then
+ source /tmp/source/configuration
+fi
+if [ -z "$MONITOR_SERVER" ] ; then
+ MONITOR_SERVER=monitor.planet-lab.org
+fi
+function upload_log ()
+{
+ file=$1
+ path=$2
+ base=$( basename $file )
+ old=/tmp/${base}.old
+ new=/tmp/${base}.new
+ log=/tmp/${base}.log
+ if [ ! -f $file ] ; then
+ return
+ fi
+ if [ -f $new ] ; then
+ cp $new $old
+ else
+ touch $old
+ fi
+ cp $file $new
+ comm -1 -3 $old $new > $log
+ if [ $( stat -c %s $log ) -ne 0 ] ; then
+ curl --max-time 60 --silent --insecure https://$MONITOR_SERVER/monitor/uploadlogs --form "dir=$path" --form "log=@$log"
+ if [ $? -ne 0 ] ; then
+ # the upload has failed, so remove new file so no data is lost
+ rm -f /tmp/$( basename $file ).new
+ fi
+ fi
+}
+
+function upload_logs ()
+{
+ upload_log $1 histfail
+}
+
+# NOTE: these aliases aim to upload the history before losing it.
+alias reboot="upload_logs /root/.bash_eternal_history ; /sbin/reboot"
+alias shutdown="upload_logs /root/.bash_eternal_history ; /sbin/shutdown"
+EOF
+
+cat <<\EOF > $HISTORY_PROFILE
+export HISTTIMEFORMAT="%s ";
+# NOTE: HOSTNAME is not reliably set in failboot or safeboot mode
+# NOTE: These steps assign at least a default hostname based on IP
+# NOTE: This hostname is used in the bash-prompt-script commands
+if [[ -z "$HOSTNAME" || "$HOSTNAME" = "(none)" ]] ; then
+ HOSTNAME=`ip addr show dev eth0 | grep inet | tr '/' ' ' | sed -e 's/^ *//g' | cut -f2 -d' '`
+fi
+if [ -f /etc/sysconfig/network-scripts/ifcfg-eth0 ] ; then
+ source /etc/sysconfig/network-scripts/ifcfg-eth0
+ if [ -n "$DHCP_HOSTNAME" ] ; then
+ HOSTNAME=$DHCP_HOSTNAME
+ else
+ if [ -n "$IPADDR" ] ; then
+ HOSTNAME=$IPADDR
+ fi
+ fi
+fi
+hostname $HOSTNAME &> /dev/null
+if [ -n "$BASH_EXECUTION_STRING" ]; then
+ # NOTE: does not work on 2.x versions of bash.
+ # NOTE: log commands executed over ssh
+ echo "$HOSTNAME $$ ssh:$USER xx `date +%s` $BASH_EXECUTION_STRING" >> /root/.bash_eternal_history;
+fi
+if [ -e /etc/sysconfig/bash-prompt-xterm ]; then
+ PROMPT_COMMAND=/etc/sysconfig/bash-prompt-xterm
+fi
+EOF
+chmod 755 $HISTORY_PROFILE
+
+cat <<\EOF > bash-prompt-script
+# NOTE: intended to run after and log every interactive-command
+echo $HOSTNAME $$ $USER "$(history 1)" >> /root/.bash_eternal_history
+EOF
+
+for f in bash-prompt-default bash-prompt-xterm ; do
+ cp bash-prompt-script /etc/sysconfig/$f
+ chmod 755 /etc/sysconfig/$f
+done
+
+# NOTE: allow command run directly over ssh to be logged also.
+echo "source /etc/profile ; source $BASH_PROFILE" > /root/.bashrc
+
+# NOTE 1: crond is not installed on the boot image, so this maintains a
+# persistent process to upload logs on legacy nodes.
+# NOTE 2: A day has 86400 seconds, $RANDOM is between 0-32767
+# NOTE 2: So, $RANDOM * 3 is between 0 and 27 hours.
+# NOTE 3: The initial delay is randomized in case many nodes reboot at the
+# same time.
+initial_delay=$(( $RANDOM * 3 ))
+
+cat <<EOF > $PERIODIC_SCRIPT
+#!/bin/bash
+if [ -f $BASH_PROFILE ] ; then
+ source $BASH_PROFILE
+else
+ echo "Cannot source upload_logs() definition!"
+ exit 1
+fi
+
+# NOTE: exit if anoter process is already running.
+if [ \$$ -ne \`pgrep -o periodic\` ] ; then
+ # the current PID differs from the oldest periodic_upload pid
+ exit 0
+fi
+sleep $initial_delay
+while /bin/true ; do
+ upload_logs /root/.bash_eternal_history
+ sleep 86400 # sleep for a day
+done
+EOF
+
+chmod 755 $PERIODIC_SCRIPT
+$PERIODIC_SCRIPT < /dev/null > /tmp/upload.log 2>&1 &
log.write( "Unloading modules and chain booting to new kernel.\n" )
# further use of log after Upload will only output to screen
- log.Upload()
+ log.Upload("/root/.bash_eternal_history")
# regardless of whether kexec works or not, we need to stop trying to
# run anything
cmd = "mount %s %s/vservers" % \
(PARTITIONS["vservers"],SYSIMG_PATH)
utils.sysexec_noerr( cmd, log )
- cmd = "ext2online %s/vservers" % SYSIMG_PATH
+ cmd = "resize2fs %s" % PARTITIONS["vservers"]
resize = utils.sysexec_noerr(cmd,log)
utils.sysexec_noerr( "umount %s/vservers" % SYSIMG_PATH, log )
utils.sysexec_noerr( "umount %s" % SYSIMG_PATH, log )
if NODE_MODEL_OPTIONS & ModelOptions.RAWDISK:
VSERVERS_SIZE= "-1"
- if "VSERVER_SIZE" in vars:
+ if "VSERVERS_SIZE" in vars:
VSERVERS_SIZE= vars["VSERVERS_SIZE"]
if VSERVERS_SIZE == "" or VSERVERS_SIZE == 0:
raise ValueError, "VSERVERS_SIZE"
log.write("formatting %s partition (%s)%s.\n" % (fs,devname,txt))
utils.sysexec( "mkfs.ext2 -q %s -m %d -j %s" % (option,rbp,devname), log )
+ # disable time/count based filesystems checks
+ for filesystem in ("root","vservers"):
+ utils.sysexec_noerr( "tune2fs -c -1 -i 0 %s" % PARTITIONS[filesystem], log)
+
# save the list of block devices in the log
log.write( "Block devices used (in lvm): %s\n" % repr(used_devices))
import string
import socket
import re
+import time
import utils
from Exceptions import *
try:
resolved_node_ip= socket.gethostbyname(hostname)
except socket.gaierror, e:
- hostname_resolve_ok= 0
+ # sleep 5 minutes and try again
+ time.sleep(60*5)
+ try:
+ resolved_node_ip= socket.gethostbyname(hostname)
+ except socket.gaierror, e:
+ hostname_resolve_ok= 0
if INTERFACE_SETTINGS['method'] == "dhcp":
try:
# first run fsck to prevent fs corruption from hanging mount...
log.write( "fsck %s file system\n" % filesystem )
- utils.sysexec("e2fsck -v -p %s" % (PARTITIONS[filesystem]),log)
+ utils.sysexec("e2fsck -v -p %s" % (PARTITIONS[filesystem]),log, True)
except BootManagerException, e:
log.write( "BootManagerException during fsck of %s (%s) filesystem : %s\n" %
(filesystem, PARTITIONS[filesystem], str(e)) )
try:
log.write( "Trying to recover filesystem errors on %s\n" % filesystem )
- utils.sysexec("e2fsck -v -y %s" % (PARTITIONS[filesystem]),log)
+ utils.sysexec("e2fsck -v -y %s" % (PARTITIONS[filesystem]),log, True)
except BootManagerException, e:
log.write( "BootManagerException during trying to recover filesystem errors on %s (%s) filesystem : %s\n" %
(filesystem, PARTITIONS[filesystem], str(e)) )
return -1
+ else:
+ # disable time/count based filesystems checks
+ utils.sysexec_noerr( "tune2fs -c -1 -i 0 %s" % PARTITIONS[filesystem], log)
try:
# then attempt to mount them
# expected /proc/partitions format
import os, sys, shutil
-import popen2
+import subprocess
+import shlex
import socket
import fcntl
import string
-def sysexec( cmd, log= None ):
+def sysexec( cmd, log= None, fsck = False ):
"""
execute a system command, output the results to the logger
if log <> None
"""
if VERBOSE_MODE:
print ("sysexec >>> %s" % cmd)
- prog= popen2.Popen4( cmd, 0 )
- if prog is None:
+
+ try:
+ if cmd.__contains__(">"):
+ prog = subprocess.Popen(shlex.split(cmd), shell=True)
+ else:
+ prog = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except OSError:
raise BootManagerException, \
- "Unable to create instance of popen2.Popen4 " \
+ "Unable to create instance of subprocess.Popen " \
"for command: %s" % cmd
+ try:
+ (stdoutdata, stderrdata) = prog.communicate()
+ except KeyboardInterrupt:
+ raise BootManagerException, "Interrupted by user"
if log is not None:
- try:
- for line in prog.fromchild:
- log.write( line )
- except KeyboardInterrupt:
- raise BootManagerException, "Interrupted by user"
-
- returncode= prog.wait()
- if returncode != 0 and returncode != 256:
- raise BootManagerException, "Running %s failed (rc=%d)" % (cmd,returncode)
-
- prog= None
+ log.write(stdoutdata)
+
+ returncode = prog.wait()
+
+ if fsck:
+ # The exit code returned by fsck is the sum of the following conditions:
+ # 0 - No errors
+ # 1 - File system errors corrected
+ # 2 - System should be rebooted
+ # 4 - File system errors left uncorrected
+ # 8 - Operational error
+ # 16 - Usage or syntax error
+ # 32 - Fsck canceled by user request
+ # 128 - Shared library error
+ if returncode != 0 and returncode != 1:
+ raise BootManagerException, "Running %s failed (rc=%d)" % (cmd,returncode)
+ else:
+ if returncode != 0:
+ raise BootManagerException, "Running %s failed (rc=%d)" % (cmd,returncode)
+
+ prog = None
return 1