# keep essential services running
restart_services() {
- for service in sshd pl_sshd swapmon nm proper ; do
+ for service in sshd pl_sshd swapmon nm fprobe-ulog codemux; do
+ chkconfig --list $service | grep -q 3:on || continue
echo "* Checking $service"
status=$(service $service status)
if [ $? -ne 0 ] || echo $status 2>&1 | grep -q stopped ; then
done
}
-# keep netflow running
-restart_netflow() {
- echo "* Checking netflow"
- echo "sudo /sbin/service netflow restart" | su - pl_netflow
- if [ $? -ne 0 ] ; then
- echo "* Restarting netflow"
- service netflow-init start
- vserver pl_netflow start
- echo "sudo /sbin/service netflow restart" | su - pl_netflow
- fi
-}
-
-# GPG keys are installed in /etc/pki/rpm-gpg by both the Boot Manager
-# during initial installation, and by PlanetLabConf during daily
-# updates. NodeUpdate imports the keys into the RPM database before
-# running yum daily. vserver-reference copies and imports the keys
-# into the reference images and system slices daily. The only parts of
-# this process that are actually necessary, are the Boot Manager and
-# vserver-reference. However, we do not want to force a re-install of
-# all nodes, and we do not want to force an update of
-# vserver-reference, so in the meantime, PlanetLabConf and NodeUpdate
-# take care of getting the keys installed and imported in /, and this
-# script takes care of getting them installed in the reference images
-# and system slices, until we can get a new vserver-reference image
-# pushed out.
-update_vserver_reference() {
- echo "* Updating VServer reference"
-
- shopt -s nullglob
-
- VROOTS="/vservers/vserver-reference /vservers/.vcache/* /vservers/${PLC_SLICE_PREFIX}_*"
-
- # Copy configuration files from host to slices
- for file in \
- /etc/hosts /etc/resolv.conf /etc/yum.conf /etc/planetlab/node_id \
- /etc/planetlab/plc_config* /etc/planetlab/php/* \
- /etc/pki/rpm-gpg/* ; do
- if [ -r $file ] ; then
- for vroot in $VROOTS ; do
- install -D -m 644 $file $vroot/$file
- done
- fi
- done
-
- # (Re)install GPG signing keys
- if [ -d /etc/pki/rpm-gpg ] ; then
- for vroot in $VROOTS ; do
- chroot $vroot rpm --allmatches -e gpg-pubkey || :
- chroot $vroot rpm --import /etc/pki/rpm-gpg/* || :
- done
- fi
-}
-
# kill all the processes running in slice contexts
vkillall() {
vps -A | awk '(int($2) > 1) { system("vkill -c " $2 " -s 9 " $1); }'
line=$(vps aux | grep $pid)
echo NM found in slice. Killing PID $pid
echo $line
- kill -9 $pid
+ vkill -9 $pid
done
}
/etc/init.d/nm start
fi
}
+
+fix_rpm() {
+ echo "* Checking for stuck rpm processes"
+
+ yum_count=`pgrep -f "yum clean all" | wc -l`
+
+ if [[ $yum_count -ge 2 ]]; then
+ echo "* $yum_count yum processes found"
+
+ # kill rpm processes, attempt up to 10 times and then give up
+ try_count=0
+ rpm_count=`pgrep "rpm|yum" | wc -l`
+ while [[ $rpm_count -gt 0 ]]; do
+ echo "* killing rpm/yum processes"
+ killall -9 rpm rpmd rpmq rpmk yum
+ sleep 1
+ rpm_count=`pgrep "rpm|yum" | wc -l`
+ try_count=`expr $try_count + 1`
+ if [[ $try_count -ge 10 ]]; then
+ echo "* failed to kill rpm processes"
+ return
+ fi
+ done
+
+ # remove lock files
+ echo "* deleting rpm lock files"
+ rm -f /var/lib/rpm/__*
+
+ # rebuild rpm database
+ echo "* rebuilding rpm database"
+ rpm --rebuilddb
+
+ echo "* rpm repair sequence complete"
+
+ fi
+}
+
# XXX kill zombie slices
-# XXX reboot if boot state changes
+run restart_services
+
+run fix_rpm
+
run kill_nonroot_nm
run kill_nm_inslice
run fix_etc_shadow
-run restart_services
-
-run restart_netflow
-
run kill_duplicate_ssh
-
-run update_vserver_reference
-