X-Git-Url: http://git.onelab.eu/?p=mom.git;a=blobdiff_plain;f=pl_mop.sh;h=49222770d418f13607c09126939cd276521c18cc;hp=97dad39f92b016e703789ebe004f9da327b52d0e;hb=a7ab53cbaf3037b67fbda98fed3887c905d93e96;hpb=efa183b54f02fdd5666312792fc73552b6f06ec6 diff --git a/pl_mop.sh b/pl_mop.sh index 97dad39..4922277 100755 --- a/pl_mop.sh +++ b/pl_mop.sh @@ -5,8 +5,6 @@ # Mark Huang # Copyright (C) 2005 The Trustees of Princeton University # -# $Id$ -# PATH=/sbin:/usr/sbin:$PATH @@ -53,7 +51,8 @@ fix_etc_shadow() { # keep essential services running restart_services() { - for service in sshd pl_sshd swapmon nm proper ; do + for service in sshd pl_sshd swapmon nm fprobe-ulog codemux; do + chkconfig --list $service | grep -q 3:on || continue echo "* Checking $service" status=$(service $service status) if [ $? -ne 0 ] || echo $status 2>&1 | grep -q stopped ; then @@ -63,59 +62,6 @@ restart_services() { done } -# keep netflow running -restart_netflow() { - echo "* Checking netflow" - echo "sudo /sbin/service netflow restart" | su - pl_netflow - if [ $? -ne 0 ] ; then - echo "* Restarting netflow" - service netflow-init start - vserver pl_netflow start - echo "sudo /sbin/service netflow restart" | su - pl_netflow - fi -} - -# GPG keys are installed in /etc/pki/rpm-gpg by both the Boot Manager -# during initial installation, and by PlanetLabConf during daily -# updates. NodeUpdate imports the keys into the RPM database before -# running yum daily. vserver-reference copies and imports the keys -# into the reference images and system slices daily. The only parts of -# this process that are actually necessary, are the Boot Manager and -# vserver-reference. However, we do not want to force a re-install of -# all nodes, and we do not want to force an update of -# vserver-reference, so in the meantime, PlanetLabConf and NodeUpdate -# take care of getting the keys installed and imported in /, and this -# script takes care of getting them installed in the reference images -# and system slices, until we can get a new vserver-reference image -# pushed out. -update_vserver_reference() { - echo "* Updating VServer reference" - - shopt -s nullglob - - VROOTS="/vservers/vserver-reference /vservers/.vcache/* /vservers/${PLC_SLICE_PREFIX}_*" - - # Copy configuration files from host to slices - for file in \ - /etc/hosts /etc/resolv.conf /etc/yum.conf /etc/planetlab/node_id \ - /etc/planetlab/plc_config* /etc/planetlab/php/* \ - /etc/pki/rpm-gpg/* ; do - if [ -r $file ] ; then - for vroot in $VROOTS ; do - install -D -m 644 $file $vroot/$file - done - fi - done - - # (Re)install GPG signing keys - if [ -d /etc/pki/rpm-gpg ] ; then - for vroot in $VROOTS ; do - chroot $vroot rpm --allmatches -e gpg-pubkey || : - chroot $vroot rpm --import /etc/pki/rpm-gpg/* || : - done - fi -} - # kill all the processes running in slice contexts vkillall() { vps -A | awk '(int($2) > 1) { system("vkill -c " $2 " -s 9 " $1); }' @@ -239,9 +185,49 @@ kill_multi_nm(){ /etc/init.d/nm start fi } + +fix_rpm() { + echo "* Checking for stuck rpm processes" + + rpm_count=`pgrep -f "rpm" | wc -l` + + if [[ $rpm_count -ge 6 ]]; then + echo "* $rpm_count rpm processes found" + + # kill rpm processes, attempt up to 10 times and then give up + try_count=0 + rpm_count=`pgrep "rpm|yum" | wc -l` + while [[ $rpm_count -gt 0 ]]; do + echo "* killing rpm/yum processes" + killall -9 rpm rpmd rpmq rpmk yum + sleep 1 + rpm_count=`pgrep "rpm|yum" | wc -l` + try_count=`expr $try_count + 1` + if [[ $try_count -ge 10 ]]; then + echo "* failed to kill rpm processes" + return + fi + done + + # remove lock files + echo "* deleting rpm lock files" + rm -f /var/lib/rpm/__* + + # rebuild rpm database + echo "* rebuilding rpm database" + rpm --rebuilddb + + echo "* rpm repair sequence complete" + + fi +} + # XXX kill zombie slices -# XXX reboot if boot state changes +run restart_services + +run fix_rpm + run kill_nonroot_nm run kill_nm_inslice @@ -252,11 +238,4 @@ run fix_vservers run fix_etc_shadow -run restart_services - -run restart_netflow - run kill_duplicate_ssh - -run update_vserver_reference -