changed 'monitordebug' to failboot
[monitor.git] / nodeinfo.py
1 #!/usr/bin/python
2
3 from monitor.wrapper import plc, plccache
4 api = plc.getAuthAPI()
5
6 from monitor import *
7 from monitor import util
8 from monitor import parser as parsermodule
9
10 from monitor.database.info.model import *
11 from monitor import reboot
12
13 import time
14 from monitor.model import *
15 from monitor.common import *
16 #from monitor.model import node_end_record, PersistFlags
17
18 parser = parsermodule.getParser()
19 parser.set_defaults(node=None, 
20                                         findbad=False,
21                                         endrecord=False)
22 parser.add_option("", "--node", dest="node", metavar="nodename.edu", 
23                                         help="A single node name to add to the nodegroup")
24 parser.add_option("", "--endrecord", dest="endrecord", action="store_true",
25                                         help="Force an end to the action record; to prompt Montior to start messaging again.")
26 parser.add_option("", "--findbad", dest="findbad", action="store_true", 
27                                         help="Re-run findbad on the nodes we're going to check before acting.")
28 parser.add_option("", "--bootcd", dest="bootcd", action="store_true",
29                                         help="A stock help message for fetching a new BootCD from the PLC GUI.")
30 config = parsermodule.parse_args(parser)
31
32
33 def plc_print_nodeinfo(plcnode):
34         url = "https://www.planet-lab.org/db/nodes/index.php?nodepattern="
35         plcnode['url'] = url + plcnode['hostname']
36
37         print "%(hostname)s %(url)s" % plcnode
38         print "   Checked: %s" % time.ctime()
39
40         print "\t boot_state |   created   |   updated   | last_contact | key"
41         print "\t       %5s | %11.11s | %11.11s | %12s | %s" % \
42                 (color_boot_state(plcnode['boot_state']), diff_time(plcnode['date_created']),
43                  diff_time(plcnode['last_updated']), 
44                  diff_time(plcnode['last_contact']), plcnode['key'])
45
46 def fb_print_nodeinfo(fbnode):
47         pf = HistoryNodeRecord.get_by(hostname= fbnode['hostname'])
48         try:
49                 fbnode['last_change'] = diff_time(pf.last_changed)
50         except:
51                 fbnode['last_change'] = diff_time(time.time())
52         print "   Checked: ",
53         if 'checked' in fbnode:
54                 print "%11.11s " % diff_time(fbnode['checked'])
55         else:
56                 print "Unknown"
57         print "\t      state |  ssh  |  pcu  | bootcd | category | last change | kernel"
58         if 'bootcd' in fbnode and fbnode['bootcd']:
59                 fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
60         else:
61                 fbnode['bootcd'] = "unknown"
62         if 'state' in fbnode:
63                 fbnode['state'] = color_boot_state(get_current_state(fbnode))
64         else:
65                 fbnode['state'] = "none"
66         if 'kernel' in fbnode and len(fbnode['kernel'].split()) > 2:
67                 fbnode['kernel'] = fbnode['kernel'].split()[2]
68         else:
69                 fbnode['kernel'] = ""
70         print "\t       %(state)5s | %(ssh_status)5.5s | %(bootcd)6.6s | %(observed_category)8.8s | %(last_change)11s | %(kernel)s" % fbnode
71
72 def act_print_nodeinfo(actnode, header):
73         if header[0]:
74                 if 'date_created' in actnode:
75                         print "   Created: %11.11s" % diff_time(actnode['date_created'])
76                 print "   LastTime %11.11s" % diff_time(actnode['time'])
77                 print "\t      RT     | category | action          | msg"
78                 header[0] = False
79
80         if 'rt' in actnode and 'Status' in actnode['rt']:
81                 print "\t %5.5s %5.5s | %8.8s | %15.15s | %s" % \
82                         (actnode['rt']['Status'], actnode['rt']['id'][7:],
83                          actnode['category'], actnode['action'][0], actnode['info'][1:])
84         else:
85                 if type(actnode['action']) == type([]):
86                         action = actnode['action'][0]
87                 else:
88                         action = actnode['action']
89                 if 'category' in actnode:
90                         category = actnode['category']
91                 else:
92                         category = "none"
93                         
94                 if 'msg_format' in actnode:
95                         print "\t       %5.5s | %8.8s | %15.15s | %s" % \
96                         (actnode['ticket_id'],
97                          category, action, 
98                          actnode['msg_format'][:-1])
99                 else:
100                         print "\t       %5.5s | %8.8s | %15.15s" % \
101                         (actnode['ticket_id'],
102                          category, action)
103
104 def pcu_print_info(pcuinfo, hostname):
105         print "   Checked: ",
106         pcuinfo.update(pcuinfo['plc_pcu_stats'])
107         if 'checked' in pcuinfo:
108                 print "%11.11s " % diff_time(pcuinfo['checked'])
109         else:
110                 print "Unknown"
111
112         print "\t            user   |          password | port | pcu_id | hostname "
113         print "\t %17s | %17s | %4s | %6s | %30s | %s" % \
114                 (pcuinfo['username'], pcuinfo['password'], 
115                  pcuinfo[hostname], pcuinfo['pcu_id'], reboot.pcu_name(pcuinfo), pcuinfo['model'])
116
117         if 'port_status' in pcuinfo and pcuinfo['port_status'] != {} and pcuinfo['port_status'] != None:
118                 if pcuinfo['port_status']['22'] == "open":
119                         print "\t ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no %s@%s" % (pcuinfo['username'], reboot.pcu_name(pcuinfo))
120                 if pcuinfo['port_status']['23'] == "open":
121                         print "\t telnet %s" % (reboot.pcu_name(pcuinfo))
122                 if pcuinfo['port_status']['80'] == "open" or \
123                         pcuinfo['port_status']['443'] == "open":
124                         print "\t https://%s" % (reboot.pcu_name(pcuinfo))
125                         print "\t import %s.png" % (reboot.pcu_name(pcuinfo))
126                         print """\t mutt -s "crash for %s" -a %s.png sapanb@cs.princeton.edu < /dev/null""" % (hostname, reboot.pcu_name(pcuinfo))
127                 if pcuinfo['port_status']['443'] == "open":
128                         print "\t racadm.py -r %s -u %s -p '%s'" % (pcuinfo['ip'], pcuinfo['username'], pcuinfo['password'])
129                         print "\t cmdhttps/locfg.pl -s %s -f iloxml/Reset_Server.xml -u %s -p '%s' | grep MESSAGE" % \
130                                 (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
131                         print "\t cmdhttps/locfg.pl -s %s -f iloxml/License.xml -u %s -p '%s' | grep MESSAGE" % \
132                                 (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
133                 if pcuinfo['port_status']['16992'] == "open":
134                         print "\t ./cmdamt/remoteControl -A -verbose 'http://%s:16992/RemoteControlService' -user admin -pass '%s'" % (reboot.pcu_name(pcuinfo), pcuinfo['password'])
135
136 if config.findbad:
137         # rerun findbad with the nodes in the given nodes.
138         import os
139         file = "findbad.txt"
140         util.file.setFileFromList(file, config.args)
141         os.system("./findbad.py --cachenodes --debug=0 --dbname=findbad --increment --nodelist %s" % file)
142
143 for node in config.args:
144         config.node = node
145
146         plc_nodeinfo = plccache.GetNodeByName(config.node)
147         fb_noderec = FindbadNodeRecord.get_latest_by(hostname=node) 
148         fb_nodeinfo = fb_noderec.to_dict()
149         plc_print_nodeinfo(plc_nodeinfo)
150
151         fb_nodeinfo['hostname'] = node
152         fb_print_nodeinfo(fb_nodeinfo)
153
154         if fb_nodeinfo['plc_pcuid'] > 0:
155                 pcu = FindbadPCURecord.get_latest_by(plc_pcuid=fb_nodeinfo['plc_pcuid'])
156                 if pcu: pcu_print_info(pcu.to_dict(), config.node)
157
158         try:
159                 act_all = database.dbLoad("act_all")
160         except:
161                 act_all = {}
162         if config.node in act_all and len(act_all[config.node]) > 0:
163                 header = [True]
164
165                 if config.endrecord:
166                         node_end_record(config.node)
167                         #a = Action(config.node, act_all[config.node][0])
168                         #a.delField('rt')
169                         #a.delField('found_rt_ticket')
170                         #a.delField('second-mail-at-oneweek')
171                         #a.delField('second-mail-at-twoweeks')
172                         #a.delField('first-found')
173                         #rec = a.get()
174                         #rec['action'] = ["close_rt"]
175                         #rec['category'] = "UNKNOWN"
176                         #rec['stage'] = "monitor-end-record"
177                         #rec['time'] = time.time() - 7*60*60*24
178                         #act_all[config.node].insert(0,rec)
179                         #database.dbDump("act_all", act_all)
180
181                 for act_nodeinfo in act_all[config.node]:
182                         act_print_nodeinfo(act_nodeinfo, header)
183         else: act_nodeinfo = None
184
185         print ""
186
187         if config.bootcd:
188                 print """
189 If you need a new bootcd, the steps are very simple:
190
191 Visit:
192  * https://www.planet-lab.org/db/nodes/index.php?nodepattern=%s
193  * Select Download -> Download ISO image for %s
194  * Save the ISO, and burn it to a writable CD-ROM.
195  * Replace the old CD and reboot the machine.
196
197 Please let me know if you have any additional questions.
198 """ % (config.node, config.node)
199