Changed 'import auth' statements to use plc.py or monitorconfig.py
[monitor.git] / unified_model.py
1 #!/usr/bin/python
2
3 import database
4
5 import plc
6 api = plc.getAuthAPI()
7
8 import mailer
9 import time
10 from nodecommon import *
11
12 from const import *
13
14 def gethostlist(hostlist_file):
15         import config
16         return config.getListFromFile(hostlist_file)
17         
18         #nodes = api.GetNodes({'peer_id' : None}, ['hostname'])
19         #return [ n['hostname'] for n in nodes ]
20
21 def array_to_priority_map(array):
22         """ Create a mapping where each entry of array is given a priority equal
23         to its position in the array.  This is useful for subsequent use in the
24         cmpMap() function."""
25         map = {}
26         count = 0
27         for i in array:
28                 map[i] = count
29                 count += 1
30         return map
31
32 def cmpValMap(v1, v2, map):
33         if v1 in map and v2 in map and map[v1] < map[v2]:
34                 return 1
35         elif v1 in map and v2 in map and map[v1] > map[v2]:
36                 return -1
37         elif v1 in map and v2 in map:
38                 return 0
39         else:
40                 raise Exception("No index %s or %s in map" % (v1, v2))
41
42 def cmpCategoryVal(v1, v2):
43         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
44         return cmpValMap(v1,v2,map)
45
46
47 class PCU:
48         def __init__(self, hostname):
49                 self.hostname = hostname
50
51         def reboot(self):
52                 return True
53         def available(self):
54                 return True
55         def previous_attempt(self):
56                 return True
57         def setValidMapping(self):
58                 pass
59
60 class Penalty:
61         def __init__(self, key, valuepattern, action):
62                 pass
63
64 class PenaltyMap:
65         def __init__(self):
66                 pass
67
68         # connect one penalty to another, in a FSM diagram.  After one
69         #       condition/penalty is applied, move to the next phase.
70
71
72 fb = database.dbLoad("findbad")
73
74 class RT(object):
75         def __init__(self, ticket_id = None):
76                 self.ticket_id = ticket_id
77                 if self.ticket_id:
78                         print "getting ticket status",
79                         self.status = mailer.getTicketStatus(self.ticket_id)
80                         print self.status
81
82         def setTicketStatus(self, status):
83                 mailer.setTicketStatus(self.ticket_id, status)
84                 self.status = mailer.getTicketStatus(self.ticket_id)
85                 return True
86         
87         def getTicketStatus(self):
88                 if not self.status:
89                         self.status = mailer.getTicketStatus(self.ticket_id)
90                 return self.status
91
92         def closeTicket(self):
93                 mailer.closeTicketViaRT(self.ticket_id) 
94
95         def email(self, subject, body, to):
96                 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
97                 return self.ticket_id
98
99 class Message(object):
100         def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
101                 self.via_rt = via_rt
102                 self.subject = subject
103                 self.message = message
104                 self.rt = RT(ticket_id)
105
106         def send(self, to):
107                 if self.via_rt:
108                         return self.rt.email(self.subject, self.message, to)
109                 else:
110                         return mailer.email(self.subject, self.message, to)
111
112 class Recent(object):
113         def __init__(self, withintime):
114                 self.withintime = withintime
115
116                 try:
117                         self.time = self.__getattribute__('time')
118                 except:
119                         self.time = time.time()- 7*24*60*60
120
121                 #self.time = time.time()
122                 #self.action_taken = False
123
124         def isRecent(self):
125                 if self.time + self.withintime < time.time():
126                         self.action_taken = False
127
128                 if self.time + self.withintime > time.time() and self.action_taken:
129                         return True
130                 else:
131                         return False
132
133         def unsetRecent(self):
134                 self.action_taken = False
135                 self.time = time.time()
136                 return True
137
138         def setRecent(self):
139                 self.action_taken = True
140                 self.time = time.time()
141                 return True
142                 
143 class PersistFlags(Recent):
144         def __new__(typ, id, *args, **kwargs):
145                 if 'db' in kwargs:
146                         db = kwargs['db']
147                         del kwargs['db']
148                 else:
149                         db = "persistflags"
150
151                 try:
152                         pm = database.dbLoad(db)
153                 except:
154                         database.dbDump(db, {})
155                         pm = database.dbLoad(db)
156                 #print pm
157                 if id in pm:
158                         obj = pm[id]
159                 else:
160                         obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
161                         for key in kwargs.keys():
162                                 obj.__setattr__(key, kwargs[key])
163                         obj.time = time.time()
164                         obj.action_taken = False
165
166                 obj.db = db
167                 return obj
168
169         def __init__(self, id, withintime, **kwargs):
170                 self.id = id
171                 Recent.__init__(self, withintime)
172
173         def save(self):
174                 pm = database.dbLoad(self.db)
175                 pm[self.id] = self
176                 database.dbDump(self.db, pm)
177
178         def resetFlag(self, name):
179                 self.__setattr__(name, False)
180
181         def setFlag(self, name):
182                 self.__setattr__(name, True)
183                 
184         def getFlag(self, name):
185                 try:
186                         return self.__getattribute__(name)
187                 except:
188                         self.__setattr__(name, False)
189                         return False
190
191         def resetRecentFlag(self, name):
192                 self.resetFlag(name)
193                 self.unsetRecent()
194
195         def setRecentFlag(self, name):
196                 self.setFlag(name)
197                 self.setRecent()
198
199         def getRecentFlag(self, name):
200                 # if recent and flag set -> true
201                 # else false
202                 try:
203                         return self.isRecent() & self.__getattribute__(name)
204                 except:
205                         self.__setattr__(name, False)
206                         return False
207
208         def checkattr(self, name):
209                 try:
210                         x = self.__getattribute__(name)
211                         return True
212                 except:
213                         return False
214                 
215
216 class PersistMessage(Message):
217         def __new__(typ, id, subject, message, via_rt, **kwargs):
218                 if 'db' in kwargs:
219                         db = kwargs['db']
220                 else:
221                         db = "persistmessages"
222
223                 try:
224                         pm = database.dbLoad(db)
225                 except:
226                         database.dbDump(db, {})
227                         pm = database.dbLoad(db)
228
229                 #print pm
230                 if id in pm:
231                         print "Using existing object"
232                         obj = pm[id]
233                 else:
234                         print "creating new object"
235                         obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
236                         obj.id = id
237                         obj.actiontracker = Recent(3*60*60*24)
238                         obj.ticket_id = None
239
240                 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
241                         obj.ticket_id = kwargs['ticket_id']
242
243                 obj.db = db
244                 return obj
245
246         def __init__(self, id, subject, message, via_rt=True, **kwargs):
247                 print "initializing object: %s" % self.ticket_id
248                 self.id = id
249                 Message.__init__(self, subject, message, via_rt, self.ticket_id)
250
251         def reset(self):
252                 self.actiontracker.unsetRecent()
253
254         def send(self, to):
255                 if not self.actiontracker.isRecent():
256                         self.ticket_id = Message.send(self, to)
257                         self.actiontracker.setRecent()
258
259                         #print "recording object for persistance"
260                         pm = database.dbLoad(self.db)
261                         pm[self.id] = self
262                         database.dbDump(self.db, pm)
263                 else:
264                         # NOTE: only send a new message every week, regardless.
265                         print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // 60*60*24)
266
267 class MonitorMessage(object):
268         def __new__(typ, id, *args, **kwargs):
269                 if 'db' in kwargs:
270                         db = kwargs['db']
271                 else:
272                         db = "monitormessages"
273
274                 try:
275                         if 'reset' in kwargs and kwargs['reset'] == True:
276                                 database.dbDump(db, {})
277                         pm = database.dbLoad(db)
278                 except:
279                         database.dbDump(db, {})
280                         pm = database.dbLoad(db)
281
282                 #print pm
283                 if id in pm:
284                         print "Using existing object"
285                         obj = pm[id]
286                 else:
287                         print "creating new object"
288                         obj = super(object, typ).__new__(typ, id, *args, **kwargs)
289                         obj.id = id
290                         obj.sp = PersistSitePenalty(id, 0)
291
292                 obj.db = db
293                 return obj
294
295         def __init__(self, id, message):
296                 pass
297                 
298
299 class SitePenalty(object):
300         penalty_map = [] 
301         penalty_map.append( { 'name': 'noop',                   'enable'   : lambda host: None,
302                                                                                                         'disable'  : lambda host: None } )
303         penalty_map.append( { 'name': 'nocreate',               'enable'   : lambda host: plc.removeSliceCreation(host),
304                                                                                                         'disable'  : lambda host: plc.enableSliceCreation(host) } )
305         penalty_map.append( { 'name': 'suspendslices',  'enable'   : lambda host: plc.suspendSlices(host),
306                                                                                                         'disable'  : lambda host: plc.enableSlices(host) } )
307
308         #def __init__(self, index=0, **kwargs):
309         #       self.index = index
310
311         def get_penalties(self):
312                 # TODO: get penalties actually applied to a node from PLC DB.
313                 return [ n['name'] for n in SitePenalty.penalty_map ] 
314
315         def increase(self):
316                 self.index = self.index + 1
317                 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
318                 return True
319
320         def decrease(self):
321                 self.index = self.index - 1
322                 if self.index < 0: self.index = 0
323                 return True
324
325         def apply(self, host):
326
327                 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
328                         print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
329                         SitePenalty.penalty_map[i]['disable'](host)
330
331                 for i in range(0,self.index+1):
332                         print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
333                         SitePenalty.penalty_map[i]['enable'](host)
334
335                 return
336
337
338
339 class PersistSitePenalty(SitePenalty):
340         def __new__(typ, id, index, **kwargs):
341                 if 'db' in kwargs:
342                         db = kwargs['db']
343                 else:
344                         db = "persistpenalties"
345
346                 try:
347                         if 'reset' in kwargs and kwargs['reset'] == True:
348                                 database.dbDump(db, {})
349                         pm = database.dbLoad(db)
350                 except:
351                         database.dbDump(db, {})
352                         pm = database.dbLoad(db)
353
354                 #print pm
355                 if id in pm:
356                         print "Using existing object"
357                         obj = pm[id]
358                 else:
359                         print "creating new object"
360                         obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
361                         obj.id = id
362                         obj.index = index
363
364                 obj.db = db
365                 return obj
366
367         def __init__(self, id, index, **kwargs):
368                 self.id = id
369
370         def save(self):
371                 pm = database.dbLoad(self.db)
372                 pm[self.id] = self
373                 database.dbDump(self.db, pm)
374
375
376 class Target:
377         """
378                 Each host has a target set of attributes.  Some may be set manually,
379                 or others are set globally for the preferred target.
380
381                 For instance:
382                         All nodes in the Alpha or Beta group would have constraints like:
383                                 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
384         """
385         def __init__(self, constraints):
386                 self.constraints = constraints
387
388         def verify(self, data):
389                 """
390                         self.constraints is a list of key, value pairs.
391                         # [ {... : ...}==AND , ... , ... , ] == OR
392                 """
393                 con_or_true = False
394                 for con in self.constraints:
395                         #print "con: %s" % con
396                         con_and_true = True
397                         for key in con.keys():
398                                 #print "looking at key: %s" % key
399                                 if key in data: 
400                                         #print "%s %s" % (con[key], data[key])
401                                         con_and_true = con_and_true & (con[key] in data[key])
402                                 elif key not in data:
403                                         print "missing key %s" % key
404                                         con_and_true = False
405
406                         con_or_true = con_or_true | con_and_true
407
408                 return con_or_true
409
410 class Record(object):
411
412         def __init__(self, hostname, data):
413                 self.hostname = hostname
414                 self.data = data
415                 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
416                 self.loginbase = self.plcdb_hn2lb[self.hostname]
417                 return
418
419
420         def stageIswaitforever(self):
421                 if 'waitforever' in self.data['stage']:
422                         return True
423                 else:
424                         return False
425
426         def severity(self):
427                 category = self.data['category']
428                 prev_category = self.data['prev_category']
429                 val = cmpCategoryVal(category, prev_category)
430                 return val 
431
432         def improved(self):
433                 return self.severity() > 0
434         
435         def end_record(self):
436                 return node_end_record(self.hostname)
437
438         def reset_stage(self):
439                 self.data['stage'] = 'findbad'
440                 return True
441         
442         def getCategory(self):
443                 return self.data['category'].lower()
444
445         def getState(self):
446                 return self.data['state'].lower()
447
448         def getDaysDown(cls, diag_record):
449                 daysdown = -1
450                 if diag_record['comonstats']['uptime'] != "null":
451                         daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
452                 #elif diag_record['comonstats']['sshstatus'] != "null":
453                 #       daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
454                 #elif diag_record['comonstats']['lastcotop'] != "null":
455                 #       daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
456                 else:
457                         now = time.time()
458                         last_contact = diag_record['plcnode']['last_contact']
459                         if last_contact == None:
460                                 # the node has never been up, so give it a break
461                                 daysdown = -1
462                         else:
463                                 diff = now - last_contact
464                                 daysdown = diff // (60*60*24)
465                 return daysdown
466         getDaysDown = classmethod(getDaysDown)
467
468         def getStrDaysDown(cls, diag_record):
469                 daysdown = "unknown"
470                 last_contact = diag_record['plcnode']['last_contact']
471                 date_created = diag_record['plcnode']['date_created']
472
473                 if      diag_record['comonstats']['uptime'] != "null" and \
474                         diag_record['comonstats']['uptime'] != "-1":
475                         daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
476                         daysdown = "%d days up" % daysdown
477
478                 elif last_contact is None:
479                         if date_created is not None:
480                                 now = time.time()
481                                 diff = now - date_created
482                                 daysdown = diff // (60*60*24)
483                                 daysdown = "Never contacted PLC, created %s days ago" % daysdown
484                         else:
485                                 daysdown = "Never contacted PLC"
486                 else:
487                         now = time.time()
488                         diff = now - last_contact
489                         daysdown = diff // (60*60*24)
490                         daysdown = "%s days down" % daysdown
491                 return daysdown
492         getStrDaysDown = classmethod(getStrDaysDown)
493
494         #def getStrDaysDown(cls, diag_record):
495         #       daysdown = cls.getDaysDown(diag_record)
496         #       if daysdown > 0:
497         #               return "%d days down"%daysdown
498         #       elif daysdown == -1:
499         #               return "Never online"
500         #       else:
501         #               return "%d days up"% -daysdown
502         #getStrDaysDown = classmethod(getStrDaysDown)
503
504         def takeAction(self):
505                 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
506                 if 'improvement' in self.data['stage'] or self.improved():
507                         print "decreasing penalty for %s"%self.hostname
508                         pp.decrease()
509                 else:
510                         print "increasing penalty for %s"%self.hostname
511                         pp.increase()
512                 pp.apply(self.hostname)
513                 pp.save()
514
515         def _format_diaginfo(self):
516                 info = self.data['info']
517                 if self.data['stage'] == 'monitor-end-record':
518                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
519                 else:
520                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
521                 return hlist
522
523         def getMessage(self, ticket_id=None):
524                 self.data['args']['hostname'] = self.hostname
525                 self.data['args']['loginbase'] = self.loginbase
526                 self.data['args']['hostname_list'] = self._format_diaginfo()
527                 message = PersistMessage(self.hostname, 
528                                                                  self.data['message'][0] % self.data['args'],
529                                                                  self.data['message'][1] % self.data['args'],
530                                                                  True, db='monitor_persistmessages',
531                                                                  ticket_id=ticket_id)
532                 return message
533         
534         def getContacts(self):
535                 from config import config
536                 #print "policy"
537                 config = config()
538
539                 roles = self.data['email']
540
541                 if not config.mail and not config.debug and config.bcc:
542                         roles = ADMIN
543                 if config.mail and config.debug:
544                         roles = ADMIN
545
546                 # build targets
547                 contacts = []
548                 if ADMIN & roles:
549                         contacts += [config.email]
550                 if TECH & roles:
551                         contacts += [TECHEMAIL % self.loginbase]
552                 if PI & roles:
553                         contacts += [PIEMAIL % self.loginbase]
554                 if USER & roles:
555                         slices = plc.slices(self.loginbase)
556                         if len(slices) >= 1:
557                                 for slice in slices:
558                                         contacts += [SLICEMAIL % slice]
559                                 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
560                         else:
561                                 print "SLIC: %20s : 0 slices" % self.loginbase
562
563                 return contacts
564
565
566 class NodeRecord:
567         def __init__(self, hostname, target):
568                 self.hostname = hostname
569                 self.ticket = None
570                 self.target = target
571                 if hostname in fb['nodes']:
572                         self.data = fb['nodes'][hostname]['values']
573                 else:
574                         raise Exception("Hostname not in scan database")
575
576         def stageIswaitforever(self):
577                 if 'waitforever' in self.data['stage']:
578                         return True
579                 else:
580                         return False
581
582         def severity(self):
583                 category = self.data['category']
584                 prev_category = self.data['prev_category']
585                 val = cmpCategoryVal(category, prev_category)
586                 return val 
587
588         def improved(self):
589                 return self.severity() > 0
590         
591         def end_record(self):
592                 return node_end_record(self.hostname)
593
594         def reset_stage(self):
595                 self.data['stage'] = 'findbad'
596                 return True
597
598         def open_tickets(self):
599                 if self.ticket and self.ticket.status['status'] == 'open':
600                         return 1
601                 return 0
602         def setIntrospect(self):
603                 pass
604
605         def email_notice(self):
606                 message = self._get_message_for_condition()
607                 message.send(self._get_contacts_for_condition())
608                 return True
609         def close_ticket(self):
610                 if self.ticket:
611                         self.ticket.closeTicket()
612
613         def exempt_from_penalties(self):
614                 bl = database.dbLoad("l_blacklist")
615                 return self.hostname in bl
616
617         def penalties(self):
618                 return []
619         def escellate_penalty(self):
620                 return True
621         def reduce_penalty(self):
622                 return True
623
624
625         def atTarget(self):
626                 return self.target.verify(self.data)
627
628         def _get_condition(self):
629                 return self.data['category'].lower()
630
631         def _get_stage(self):
632                 "improvement"
633                 "firstnotice_noop"
634                 "secondnotice_noslicecreation"
635                 "thirdnotice_disableslices"
636
637                 delta = current_time - self.data['time']
638
639         def _get_message_for_condition(self):
640                 pass
641         def _get_contacts_for_condition(self):
642                 pass
643
644 if __name__ == "__main__":
645         #r = RT()
646         #r.email("test", "body of test message", ['database@cs.princeton.edu'])
647         #from emailTxt import mailtxt
648         print "loaded"
649         #database.dbDump("persistmessages", {});
650         #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
651         #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
652         #m.send(['soltesz@cs.utk.edu'])
653         #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
654         # TRICK timer to thinking some time has passed.
655         #m.actiontracker.time = time.time() - 6*60*60*24
656         #m.send(['soltesz@cs.utk.edu'])