import subprocess
from sets import Set
-from monitor.getsshkeys import SSHKnownHosts
+from monitor.util.sshknownhosts import SSHKnownHosts
from monitor.Rpyc import SocketConnection, Async
from monitor.Rpyc.Utils import *
print key, " == ", bm.VARS[key]
else:
print " Unable to read Node Configuration"
+
+ def fprobe_repair_node(self):
+ # When fprobe data gets too much, it fills the root partition and
+ # fails to boot
+ c = self.c
+ self.c.modules.sys.path.append("/tmp/source/")
+
+ # NOTE: assume that the root fs is already mounted...
+ if self.c.modules.os.path.exists('/tmp/mnt/sysimg/var/local/fprobe'):
+ print "CLEARING FPROBE DATA on %s" % self.node
+ self.c.modules.os.chdir('/tmp/mnt/sysimg/var/local/fprobe')
+ cmd = """ ls -lrt . | awk '{if (i<NR/2 && $9) {print "rm "$9;i=i+1;}}' | sh """
+ self.c.modules.os.system(cmd)
+ else:
+ print "COULD NOT CLEAR FPROBE DATA on %s" % self.node
def fsck_repair_node(self):
c = self.c
# COPY Rpyc files to host
#cmd = "rsync -vvv -az -e ssh %(monitordir)s/Rpyc/ %(user)s@%(hostname)s:Rpyc 2> /dev/null" % args
- cmd = """rsync -vvv -az -e "ssh -o BatchMode=yes" %(monitordir)s/Rpyc/ %(user)s@%(hostname)s:Rpyc""" % args
+ cmd = """rsync -vvv -az -e "ssh -o BatchMode=yes" %(monitordir)s/monitor/Rpyc/ %(user)s@%(hostname)s:Rpyc""" % args
if self.verbose: print cmd
print cmd
# TODO: Add timeout
def getDiskSteps(self):
steps = [
+ ('scsierror2' , 'sd \d:\d:\d:\d: ioctl_internal_command return code = \d+'),
('scsierror' , 'SCSI error : <\d+ \d+ \d+ \d+> return code = 0x\d+'),
('ioerror' , 'end_request: I/O error, dev sd\w+, sector \d+'),
('ccisserror' , 'cciss: cmd \w+ has CHECK CONDITION'),
args['saveact'] = True
args['ccemail'] = True
- sitehist.sendMessage('unknownsequence_notice', **args)
+ if 'nospace' in s:
+ # NOTE: sequence is unknown and contains nospace, so try the
+ # fprobe repair trick first.
+ conn.fprobe_repair_node()
+ sitehist.sendMessage('unknownsequence_notice', **args)
conn.restart_bootmanager('boot')
-
bootman_action = "restart_bootmanager"
# NOTE: Do not set the pflags value for this sequence if it's unknown.
return bootman_action
-# MAIN -------------------------------------------------------------------
-
-def main():
- from monitor import parser as parsermodule
- parser = parsermodule.getParser()
-
- parser.set_defaults(child=False, collect=False, nosetup=False, verbose=False,
- force=None, quiet=False)
- parser.add_option("", "--child", dest="child", action="store_true",
- help="This is the child mode of this process.")
- parser.add_option("", "--force", dest="force", metavar="boot_state",
- help="Force a boot state passed to BootManager.py.")
- parser.add_option("", "--quiet", dest="quiet", action="store_true",
- help="Extra quiet output messages.")
- parser.add_option("", "--verbose", dest="verbose", action="store_true",
- help="Extra debug output messages.")
- parser.add_option("", "--nonet", dest="nonet", action="store_true",
- help="Do not setup the network, use existing log files to re-run a test pass.")
- parser.add_option("", "--collect", dest="collect", action="store_true",
- help="No action, just collect dmesg, and bm.log")
- parser.add_option("", "--nosetup", dest="nosetup", action="store_true",
- help="Do not perform the orginary setup phase.")
-
- parser = parsermodule.getParser(['nodesets', 'defaults'], parser)
- config = parsermodule.parse_args(parser)
-
- if config.nodelist:
- nodes = config.getListFromFile(config.nodelist)
- elif config.node:
- nodes = [ config.node ]
- else:
- parser.print_help()
- sys.exit(1)
-
- for node in nodes:
- # get sitehist
- lb = plccache.plcdb_hn2lb[node]
- sitehist = SiteInterface.get_or_make(loginbase=lb)
- #reboot(node, config)
- restore(sitehist, node, config=None, forced_action=None)
-
if __name__ == "__main__":
- main()
+ print "ERROR: Can not execute module as a command! Please use commands/%s.py" % os.path.splitext(__file__)[0]