X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=grouprins.py;h=b85bbadd55c3dae8fb86c04c796ba2a588d70950;hb=refs%2Fheads%2F1.0;hp=97ba05b687297539d9c0e2fb30c8a72b762cc40d;hpb=c9b0045bba8ab66adf5036f9cac7f37f476b9a69;p=monitor.git diff --git a/grouprins.py b/grouprins.py index 97ba05b..b85bbad 100755 --- a/grouprins.py +++ b/grouprins.py @@ -67,12 +67,20 @@ class Reboot(object): #pflags.resetRecentFlag('pcutried') if not pflags.getRecentFlag('pcutried'): try: - print "CALLING REBOOT!!!" - ret = reboot.reboot(host) + node_pf = PersistFlags(host, 1, db='node_persistflags') + if node_pf.checkattr('last_change') and \ + node_pf.last_change < time.time() - 60*60*24 and \ + node_pf.checkattr('status') and \ + node_pf.status != "good": - pflags.setRecentFlag('pcutried') - pflags.save() - return ret + print "CALLING REBOOT!!!" + ret = reboot.reboot(host) + + pflags.setRecentFlag('pcutried') + pflags.save() + return ret + else: + return True except Exception,e: email_exception() @@ -88,14 +96,26 @@ class Reboot(object): elif not pflags.getRecentFlag('pcu_rins_tried'): try: - # set node to 'rins' boot state. - print "CALLING REBOOT +++ RINS" - plc.nodeBootState(host, 'rins') - ret = reboot.reboot(host) - - pflags.setRecentFlag('pcu_rins_tried') - pflags.save() - return ret + # NOTE: check that the node has been down for at least a + # day before rebooting it. this avoids false-reboots/rins + # from failed node detections. circa 03-12-09 + node_pf = PersistFlags(host, 1, db='node_persistflags') + if node_pf.checkattr('last_change') and \ + node_pf.last_change < time.time() - 60*60*24 and \ + node_pf.checkattr('status') and \ + node_pf.status != "good": + + # set node to 'rins' boot state. + print "CALLING REBOOT +++ RINS" + plc.nodeBootState(host, 'reinstall') + ret = reboot.reboot(host) + + pflags.setRecentFlag('pcu_rins_tried') + pflags.save() + return ret + + else: + return True except Exception,e: email_exception() @@ -169,11 +189,11 @@ def set_node_to_rins(host, fb): node = api.GetNodes(host, ['boot_state', 'last_contact', 'last_updated', 'date_created']) record = {'observation' : node[0], 'model' : 'USER_REQUEST', - 'action' : 'api.UpdateNode(%s, {"boot_state" : "rins"})' % host, + 'action' : 'api.UpdateNode(%s, {"boot_state" : "reinstall"})' % host, 'time' : time.time()} l = Log(host, record) - ret = api.UpdateNode(host, {'boot_state' : 'rins'}) + ret = api.UpdateNode(host, {'boot_state' : 'reinstall'}) if ret: # it's nice to see the current status rather than the previous status on the console node = api.GetNodes(host)[0] @@ -227,7 +247,7 @@ config = parsermodule.parse_args(parser) # COLLECT nodegroups, nodes and node lists if config.nodegroup: - ng = api.GetNodeGroups({'name' : config.nodegroup}) + ng = api.GetNodeGroups({'groupname' : config.nodegroup}) nodelist = api.GetNodes(ng[0]['node_ids']) hostnames = [ n['hostname'] for n in nodelist ]