X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bootman.py;h=a278afecae6a900590b0ad9706611db9cc1b19b5;hb=944d143a6528c4157b71f51ed480aec806cbaa06;hp=0a75fac83f15f12b3a032bd067ef1605a1e2f7f2;hpb=bccc542ed6a8eec74fd6411976cca424a1158c75;p=monitor.git diff --git a/bootman.py b/bootman.py index 0a75fac..a278afe 100755 --- a/bootman.py +++ b/bootman.py @@ -3,12 +3,11 @@ # Attempt to reboot a node in debug state. import plc -import auth -api = plc.PLC(auth.auth, auth.plc) +api = plc.getAuthAPI() import sys import os -import policy +import const from getsshkeys import SSHKnownHosts @@ -322,7 +321,7 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.newbootcd_one[1] % args, True, db='bootcd_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) print "\tDisabling %s due to out-of-date BOOTCD" % hostname api.UpdateNode(hostname, {'boot_state' : 'disable'}) @@ -401,25 +400,34 @@ def reboot(hostname, config=None, forced_action=None): ('ccisserror' , 'cciss: cmd \w+ has CHECK CONDITION byte \w+ = \w+'), ('buffererror', 'Buffer I/O error on device dm-\d, logical block \d+'), + + ('hdaseekerror', 'hda: dma_intr: status=0x\d+ { DriveReady SeekComplete Error }'), + ('hdacorrecterror', 'hda: dma_intr: error=0x\d+ { UncorrectableError }, LBAsect=\d+, sector=\d+'), + ('atareadyerror' , 'ata\d+: status=0x\d+ { DriveReady SeekComplete Error }'), ('atacorrecterror' , 'ata\d+: error=0x\d+ { UncorrectableError }'), + ('sdXerror' , 'sd\w: Current: sense key: Medium Error'), ('ext3error' , 'EXT3-fs error (device dm-\d+): ext3_find_entry: reading directory #\d+ offset \d+'), + ('floppytimeout','floppy0: floppy timeout called'), ('floppyerror', 'end_request: I/O error, dev fd\w+, sector \d+'), + # hda: dma_intr: status=0x51 { DriveReady SeekComplete Error } + # hda: dma_intr: error=0x40 { UncorrectableError }, LBAsect=23331263, sector=23331263 + # floppy0: floppy timeout called # end_request: I/O error, dev fd0, sector 0 - #Buffer I/O error on device dm-2, logical block 8888896 - #ata1: status=0x51 { DriveReady SeekComplete Error } - #ata1: error=0x40 { UncorrectableError } - #SCSI error : <0 0 0 0> return code = 0x8000002 - #sda: Current: sense key: Medium Error + # Buffer I/O error on device dm-2, logical block 8888896 + # ata1: status=0x51 { DriveReady SeekComplete Error } + # ata1: error=0x40 { UncorrectableError } + # SCSI error : <0 0 0 0> return code = 0x8000002 + # sda: Current: sense key: Medium Error # Additional sense: Unrecovered read error - auto reallocate failed - #SCSI error : <0 2 0 0> return code = 0x40001 - #end_request: I/O error, dev sda, sector 572489600 + # SCSI error : <0 2 0 0> return code = 0x40001 + # end_request: I/O error, dev sda, sector 572489600 ] id = index_to_id(steps, child.expect( steps_to_list(steps) + [ pexpect.EOF ])) sequence.append(id) @@ -445,8 +453,8 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddisk[1] % args, True, db='hardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) - conn.set_nodestate('diag') + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) + conn.set_nodestate('disable') return False print "...Downloading bm.log from %s" % node @@ -607,11 +615,14 @@ def reboot(hostname, config=None, forced_action=None): # update_node_config_email for n in ["bminit-cfg-exception-nocfg-update-bootupdatefail-nonode-debug-done", - "bminit-cfg-exception-update-bootupdatefail-nonode-debug-done", + "bminit-cfg-exception-update-bootupdatefail-nonode-debug-done", + "bminit-cfg-auth-bootcheckfail-nonode-exception-update-bootupdatefail-nonode-debug-done", ]: sequences.update({n : "update_node_config_email"}) - for n in [ "bminit-cfg-exception-nodehostname-update-debug-done", ]: + for n in [ "bminit-cfg-exception-nodehostname-update-debug-done", + "bminit-cfg-update-exception-nodehostname-update-debug-done", + ]: sequences.update({n : "nodenetwork_email"}) # update_bootcd_email @@ -635,7 +646,11 @@ def reboot(hostname, config=None, forced_action=None): sequences.update({"bminit-cfg-auth-getplc-update-hardware-exception-hardwarerequirefail-update-debug-done" : "broken_hardware_email"}) # bad_dns_email - sequences.update({"bminit-cfg-update-implementerror-bootupdatefail-dnserror-update-implementerror-bootupdatefail-dnserror-done" : "bad_dns_email"}) + for n in [ + "bminit-cfg-update-implementerror-bootupdatefail-dnserror-update-implementerror-bootupdatefail-dnserror-done", + "bminit-cfg-auth-implementerror-bootcheckfail-dnserror-update-implementerror-bootupdatefail-dnserror-done", + ]: + sequences.update( { n : "bad_dns_email"}) flag_set = True @@ -700,9 +715,9 @@ def reboot(hostname, config=None, forced_action=None): m = PersistMessage(hostname, mailtxt.plnode_cfg[0] % args, mailtxt.plnode_cfg[1] % args, True, db='nodeid_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) conn.dump_plconf_file() - conn.set_nodestate('diag') + conn.set_nodestate('disable') elif sequences[s] == "nodenetwork_email": print "...Sending message to LOOK AT NODE NETWORK" @@ -712,9 +727,9 @@ def reboot(hostname, config=None, forced_action=None): m = PersistMessage(hostname, mailtxt.plnode_network[0] % args, mailtxt.plnode_cfg[1] % args, True, db='nodenet_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) conn.dump_plconf_file() - conn.set_nodestate('diag') + conn.set_nodestate('disable') elif sequences[s] == "update_bootcd_email": print "...NOTIFY OWNER TO UPDATE BOOTCD!!!" @@ -727,7 +742,7 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.newalphacd_one[1] % args, True, db='bootcd_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) print "\tDisabling %s due to out-of-date BOOTCD" % hostname conn.set_nodestate('disable') @@ -745,7 +760,7 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddisk[1] % args, True, db='hardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) conn.set_nodestate('disable') elif sequences[s] == "update_hardware_email": @@ -757,7 +772,7 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.minimalhardware[1] % args, True, db='minhardware_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) conn.set_nodestate('disable') elif sequences[s] == "bad_dns_email": @@ -780,7 +795,7 @@ def reboot(hostname, config=None, forced_action=None): mailtxt.baddns[1] % args, True, db='baddns_persistmessages') loginbase = plc.siteId(hostname) - m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase]) + m.send([const.PIEMAIL % loginbase, const.TECHEMAIL % loginbase]) conn.set_nodestate('disable') if flag_set: @@ -793,10 +808,11 @@ def reboot(hostname, config=None, forced_action=None): # MAIN ------------------------------------------------------------------- def main(): - from config import config - from optparse import OptionParser - parser = OptionParser() - parser.set_defaults(node=None, nodelist=None, child=False, collect=False, nosetup=False, verbose=False, force=None, quiet=False) + import parser as parsermodule + parser = parsermodule.getParser() + + parser.set_defaults(child=False, collect=False, nosetup=False, verbose=False, + force=None, quiet=False) parser.add_option("", "--child", dest="child", action="store_true", help="This is the child mode of this process.") parser.add_option("", "--force", dest="force", metavar="boot_state", @@ -811,12 +827,9 @@ def main(): help="No action, just collect dmesg, and bm.log") parser.add_option("", "--nosetup", dest="nosetup", action="store_true", help="Do not perform the orginary setup phase.") - parser.add_option("", "--node", dest="node", metavar="nodename.edu", - help="A single node name to try to bring out of debug mode.") - parser.add_option("", "--nodelist", dest="nodelist", metavar="nodelist.txt", - help="A list of nodes to bring out of debug mode.") - config = config(parser) - config.parse_args() + + parser = parsermodule.getParser(['nodesets', 'defaults'], parser) + config = parsermodule.parse_args(parser) if config.nodelist: nodes = config.getListFromFile(config.nodelist)