X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=monitor%2Fbootman.py;h=975421871593f2677ff2317fe24916c268eda8a8;hb=32e47491837a321e684ea167ab6aa430145562f1;hp=eac27613ec2408f1c7db6fb18292090cf3868669;hpb=e637272100e8e03884188cb2118b21158e739bb0;p=monitor.git diff --git a/monitor/bootman.py b/monitor/bootman.py index eac2761..9754218 100755 --- a/monitor/bootman.py +++ b/monitor/bootman.py @@ -129,6 +129,21 @@ class NodeConnection: print key, " == ", bm.VARS[key] else: print " Unable to read Node Configuration" + + def fprobe_repair_node(self): + # When fprobe data gets too much, it fills the root partition and + # fails to boot + c = self.c + self.c.modules.sys.path.append("/tmp/source/") + + # NOTE: assume that the root fs is already mounted... + if self.c.modules.os.path.exists('/tmp/mnt/sysimg/var/local/fprobe'): + print "CLEARING FPROBE DATA on %s" % self.node + self.c.modules.os.chdir('/tmp/mnt/sysimg/var/local/fprobe') + cmd = """ ls -lrt . | awk '{if (i return code = 0x\d+'), ('ioerror' , 'end_request: I/O error, dev sd\w+, sector \d+'), ('ccisserror' , 'cciss: cmd \w+ has CHECK CONDITION'), @@ -681,10 +697,13 @@ def restore_basic(sitehist, hostname, config=None, forced_action=None): args['saveact'] = True args['ccemail'] = True - sitehist.sendMessage('unknownsequence_notice', **args) + if 'nospace' in s: + # NOTE: sequence is unknown and contains nospace, so try the + # fprobe repair trick first. + conn.fprobe_repair_node() + sitehist.sendMessage('unknownsequence_notice', **args) conn.restart_bootmanager('boot') - bootman_action = "restart_bootmanager" # NOTE: Do not set the pflags value for this sequence if it's unknown.