#!/usr/bin/python
import plc
-import auth
-api = plc.PLC(auth.auth, auth.plc)
-
-import soltesz
-fb = soltesz.dbLoad("findbad")
-act_all = soltesz.dbLoad("act_all")
+api = plc.getAuthAPI()
+from monitor import *
+#import database
import reboot
import time
from model import *
from nodecommon import *
+from unified_model import node_end_record, PersistFlags
+
+import util.file
-from config import config
-from optparse import OptionParser
+import parser as parsermodule
-parser = OptionParser()
-parser.set_defaults(node=None, endrecord=False)
+parser = parsermodule.getParser()
+parser.set_defaults(node=None,
+ findbad=False,
+ endrecord=False)
parser.add_option("", "--node", dest="node", metavar="nodename.edu",
help="A single node name to add to the nodegroup")
parser.add_option("", "--endrecord", dest="endrecord", action="store_true",
help="Force an end to the action record; to prompt Montior to start messaging again.")
+parser.add_option("", "--findbad", dest="findbad", action="store_true",
+ help="Re-run findbad on the nodes we're going to check before acting.")
parser.add_option("", "--bootcd", dest="bootcd", action="store_true",
help="A stock help message for fetching a new BootCD from the PLC GUI.")
-config = config(parser)
-config.parse_args()
-
-def diff_time(timestamp):
- now = time.time()
- if timestamp == None:
- return "unknown"
- diff = now - timestamp
- # return the number of seconds as a difference from current time.
- t_str = ""
- if diff < 60: # sec in min.
- t = diff
- t_str = "%s sec ago" % t
- elif diff < 60*60: # sec in hour
- t = diff // (60)
- t_str = "%s min ago" % int(t)
- elif diff < 60*60*24: # sec in day
- t = diff // (60*60)
- t_str = "%s hours ago" % int(t)
- elif diff < 60*60*24*7: # sec in week
- t = diff // (60*60*24)
- t_str = "%s days ago" % int(t)
- elif diff < 60*60*24*30: # approx sec in month
- t = diff // (60*60*24*7)
- t_str = "%s weeks ago" % int(t)
- elif diff > 60*60*24*30: # approx sec in month
- t = diff // (60*60*24*7*30)
- t_str = "%s months ago" % int(t)
- return t_str
+config = parsermodule.parse_args(parser)
+
def plc_print_nodeinfo(plcnode):
url = "https://www.planet-lab.org/db/nodes/index.php?nodepattern="
diff_time(plcnode['last_contact']), plcnode['key'])
def fb_print_nodeinfo(fbnode):
+ pf = PersistFlags(fbnode['hostname'], 1, db='node_persistflags')
+ fbnode['last_change'] = diff_time(pf.last_changed)
print " Checked: ",
if 'checked' in fbnode:
print "%11.11s " % diff_time(fbnode['checked'])
else:
print "Unknown"
- print "\t state | ssh | pcu | bootcd | category | kernel"
+ print "\t state | ssh | pcu | bootcd | category | last change | kernel"
if fbnode['bootcd']:
fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
else:
fbnode['state'] = color_boot_state(get_current_state(fbnode))
else:
fbnode['state'] = "none"
- fbnode['kernel'] = fbnode['kernel'].split()[2]
- print "\t %(state)5s | %(ssh)5.5s | %(pcu)5.5s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+ if len(fbnode['kernel'].split()) > 2:
+ fbnode['kernel'] = fbnode['kernel'].split()[2]
+ print "\t %(state)5s | %(ssh)5.5s | %(pcu)5.5s | %(bootcd)6.6s | %(category)8.8s | %(last_change)11s | %(kernel)s" % fbnode
def act_print_nodeinfo(actnode, header):
if header[0]:
if 'rt' in actnode and 'Status' in actnode['rt']:
print "\t %5.5s %5.5s | %8.8s | %15.15s | %s" % \
(actnode['rt']['Status'], actnode['rt']['id'][7:],
- actnode['category'], actnode['action'][0],
- actnode['msg_format'][:-1])
+ actnode['category'], actnode['action'][0], actnode['info'][1:])
else:
if type(actnode['action']) == type([]):
action = actnode['action'][0]
print "\t telnet %s" % (reboot.pcu_name(pcuinfo))
if pcuinfo['portstatus']['80'] == "open" or \
pcuinfo['portstatus']['443'] == "open":
- print "\t http://%s" % (reboot.pcu_name(pcuinfo))
+ print "\t https://%s" % (reboot.pcu_name(pcuinfo))
+ print "\t import %s.png" % (reboot.pcu_name(pcuinfo))
+ print """\t mutt -s "crash for %s" -a %s.png sapanb@cs.princeton.edu < /dev/null""" % (hostname, reboot.pcu_name(pcuinfo))
if pcuinfo['portstatus']['443'] == "open":
print "\t racadm.py -r %s -u %s -p '%s'" % (pcuinfo['ip'], pcuinfo['username'], pcuinfo['password'])
print "\t cmdhttps/locfg.pl -s %s -f iloxml/Reset_Server.xml -u %s -p '%s' | grep MESSAGE" % \
(reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
+ print "\t cmdhttps/locfg.pl -s %s -f iloxml/License.xml -u %s -p '%s' | grep MESSAGE" % \
+ (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
+ if pcuinfo['portstatus']['16992'] == "open":
+ print "\t ./cmdamt/remoteControl -A -verbose 'http://%s:16992/RemoteControlService' -user admin -pass '%s'" % (reboot.pcu_name(pcuinfo), pcuinfo['password'])
+
+if config.findbad:
+ # rerun findbad with the nodes in the given nodes.
+ import os
+ file = "findbad.txt"
+ util.file.setFileFromList(file, config.args)
+ os.system("./findbad.py --cachenodes --debug=0 --dbname=findbad --increment --nodelist %s" % file)
for node in config.args:
config.node = node
+ fb = database.dbLoad("findbad")
plc_nodeinfo = api.GetNodes({'hostname': config.node}, None)[0]
fb_nodeinfo = fb['nodes'][config.node]['values']
plc_print_nodeinfo(plc_nodeinfo)
+ fb_nodeinfo['hostname'] = node
fb_print_nodeinfo(fb_nodeinfo)
if fb_nodeinfo['pcu'] == "PCU":
pcu = reboot.get_pcu_values(fb_nodeinfo['plcnode']['pcu_ids'][0])
- pcu_print_info(pcu, config.node)
+ if pcu: pcu_print_info(pcu, config.node)
+ try:
+ act_all = database.dbLoad("act_all")
+ except:
+ act_all = {}
if config.node in act_all and len(act_all[config.node]) > 0:
header = [True]
#rec['stage'] = "monitor-end-record"
#rec['time'] = time.time() - 7*60*60*24
#act_all[config.node].insert(0,rec)
- #soltesz.dbDump("act_all", act_all)
+ #database.dbDump("act_all", act_all)
for act_nodeinfo in act_all[config.node]:
act_print_nodeinfo(act_nodeinfo, header)