merge from:
[monitor.git] / unified_model.py
1 #!/usr/bin/python
2
3 from monitor import database
4
5 from monitor.wrapper import plc
6 from monitor.wrapper import mailer
7 import time
8
9 from model import *
10 from monitor.const import *
11 from monitor import util
12 from monitor import config
13
14 def gethostlist(hostlist_file):
15         return util.file.getListFromFile(hostlist_file)
16
17 def array_to_priority_map(array):
18         """ Create a mapping where each entry of array is given a priority equal
19         to its position in the array.  This is useful for subsequent use in the
20         cmpMap() function."""
21         map = {}
22         count = 0
23         for i in array:
24                 map[i] = count
25                 count += 1
26         return map
27
28 def cmpValMap(v1, v2, map):
29         if v1 in map and v2 in map and map[v1] < map[v2]:
30                 return 1
31         elif v1 in map and v2 in map and map[v1] > map[v2]:
32                 return -1
33         elif v1 in map and v2 in map:
34                 return 0
35         else:
36                 raise Exception("No index %s or %s in map" % (v1, v2))
37
38 def cmpCategoryVal(v1, v2):
39         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
40         return cmpValMap(v1,v2,map)
41
42
43 class PCU:
44         def __init__(self, hostname):
45                 self.hostname = hostname
46
47         def reboot(self):
48                 return True
49         def available(self):
50                 return True
51         def previous_attempt(self):
52                 return True
53         def setValidMapping(self):
54                 pass
55
56 class Penalty:
57         def __init__(self, key, valuepattern, action):
58                 pass
59
60 class PenaltyMap:
61         def __init__(self):
62                 pass
63
64         # connect one penalty to another, in a FSM diagram.  After one
65         #       condition/penalty is applied, move to the next phase.
66
67
68 #fb = database.dbLoad("findbad")
69
70 class RT(object):
71         def __init__(self, ticket_id = None):
72                 self.ticket_id = ticket_id
73                 if self.ticket_id:
74                         print "getting ticket status",
75                         self.status = mailer.getTicketStatus(self.ticket_id)
76                         print self.status
77
78         def setTicketStatus(self, status):
79                 mailer.setTicketStatus(self.ticket_id, status)
80                 self.status = mailer.getTicketStatus(self.ticket_id)
81                 return True
82         
83         def getTicketStatus(self):
84                 if not self.status:
85                         self.status = mailer.getTicketStatus(self.ticket_id)
86                 return self.status
87
88         def closeTicket(self):
89                 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.") 
90
91         def email(self, subject, body, to):
92                 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
93                 return self.ticket_id
94
95 class Message(object):
96         def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
97                 self.via_rt = via_rt
98                 self.subject = subject
99                 self.message = message
100                 self.rt = RT(ticket_id)
101
102         def send(self, to):
103                 if self.via_rt:
104                         return self.rt.email(self.subject, self.message, to)
105                 else:
106                         return mailer.email(self.subject, self.message, to)
107
108 class Recent(object):
109         def __init__(self, withintime):
110                 self.withintime = withintime
111
112                 try:
113                         self.time = self.__getattribute__('time')
114                 except:
115                         self.time = time.time()- 7*24*60*60
116
117                 #self.time = time.time()
118                 #self.action_taken = False
119
120         def isRecent(self):
121                 if self.time + self.withintime < time.time():
122                         self.action_taken = False
123
124                 if self.time + self.withintime > time.time() and self.action_taken:
125                         return True
126                 else:
127                         return False
128
129         def unsetRecent(self):
130                 self.action_taken = False
131                 self.time = time.time()
132                 return True
133
134         def setRecent(self):
135                 self.action_taken = True
136                 self.time = time.time()
137                 return True
138                 
139 class PersistFlags(Recent):
140         def __new__(typ, id, *args, **kwargs):
141                 if 'db' in kwargs:
142                         db = kwargs['db']
143                         del kwargs['db']
144                 else:
145                         db = "persistflags"
146
147                 try:
148                         pm = database.dbLoad(db)
149                 except:
150                         database.dbDump(db, {})
151                         pm = database.dbLoad(db)
152                 #print pm
153                 if id in pm:
154                         obj = pm[id]
155                 else:
156                         obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
157                         for key in kwargs.keys():
158                                 obj.__setattr__(key, kwargs[key])
159                         obj.time = time.time()
160                         obj.action_taken = False
161
162                 obj.db = db
163                 return obj
164
165         def __init__(self, id, withintime, **kwargs):
166                 self.id = id
167                 Recent.__init__(self, withintime)
168
169         def save(self):
170                 pm = database.dbLoad(self.db)
171                 pm[self.id] = self
172                 database.dbDump(self.db, pm)
173
174         def resetFlag(self, name):
175                 self.__setattr__(name, False)
176
177         def setFlag(self, name):
178                 self.__setattr__(name, True)
179                 
180         def getFlag(self, name):
181                 try:
182                         return self.__getattribute__(name)
183                 except:
184                         self.__setattr__(name, False)
185                         return False
186
187         def resetRecentFlag(self, name):
188                 self.resetFlag(name)
189                 self.unsetRecent()
190
191         def setRecentFlag(self, name):
192                 self.setFlag(name)
193                 self.setRecent()
194
195         def getRecentFlag(self, name):
196                 # if recent and flag set -> true
197                 # else false
198                 try:
199                         return self.isRecent() & self.__getattribute__(name)
200                 except:
201                         self.__setattr__(name, False)
202                         return False
203
204         def checkattr(self, name):
205                 try:
206                         x = self.__getattribute__(name)
207                         return True
208                 except:
209                         return False
210                 
211
212 class PersistMessage(Message):
213         def __new__(typ, id, subject, message, via_rt, **kwargs):
214                 if 'db' in kwargs:
215                         db = kwargs['db']
216                 else:
217                         db = "persistmessages"
218
219                 try:
220                         pm = database.dbLoad(db)
221                 except:
222                         database.dbDump(db, {})
223                         pm = database.dbLoad(db)
224
225                 #print pm
226                 if id in pm:
227                         #print "Using existing object"
228                         obj = pm[id]
229                 else:
230                         #print "creating new object"
231                         obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
232                         obj.id = id
233                         obj.actiontracker = Recent(1*60*60*24)
234                         obj.ticket_id = None
235
236                 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
237                         obj.ticket_id = kwargs['ticket_id']
238
239                 obj.db = db
240                 return obj
241
242         def __init__(self, id, subject, message, via_rt=True, **kwargs):
243                 print "initializing object: %s" % self.ticket_id
244                 self.id = id
245                 Message.__init__(self, subject, message, via_rt, self.ticket_id)
246
247         def reset(self):
248                 self.actiontracker.unsetRecent()
249
250         def save(self):
251                 pm = database.dbLoad(self.db)
252                 pm[self.id] = self
253                 database.dbDump(self.db, pm)
254
255         def send(self, to):
256                 if not self.actiontracker.isRecent():
257                         self.ticket_id = Message.send(self, to)
258                         self.actiontracker.setRecent()
259                         self.save()
260                 else:
261                         # NOTE: only send a new message every week, regardless.
262                         # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
263                         print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
264
265 class MonitorMessage(object):
266         def __new__(typ, id, *args, **kwargs):
267                 if 'db' in kwargs:
268                         db = kwargs['db']
269                 else:
270                         db = "monitormessages"
271
272                 try:
273                         if 'reset' in kwargs and kwargs['reset'] == True:
274                                 database.dbDump(db, {})
275                         pm = database.dbLoad(db)
276                 except:
277                         database.dbDump(db, {})
278                         pm = database.dbLoad(db)
279
280                 #print pm
281                 if id in pm:
282                         print "Using existing object"
283                         obj = pm[id]
284                 else:
285                         print "creating new object"
286                         obj = super(object, typ).__new__(typ, id, *args, **kwargs)
287                         obj.id = id
288                         obj.sp = PersistSitePenalty(id, 0)
289
290                 obj.db = db
291                 return obj
292
293         def __init__(self, id, message):
294                 pass
295                 
296
297 class SitePenalty(object):
298         penalty_map = [] 
299         penalty_map.append( { 'name': 'noop',                   'enable'   : lambda host: None,
300                                                                                                         'disable'  : lambda host: None } )
301         penalty_map.append( { 'name': 'nocreate',               'enable'   : lambda host: plc.removeSliceCreation(host),
302                                                                                                         'disable'  : lambda host: plc.enableSliceCreation(host) } )
303         penalty_map.append( { 'name': 'suspendslices',  'enable'   : lambda host: plc.suspendSlices(host),
304                                                                                                         'disable'  : lambda host: plc.enableSlices(host) } )
305
306         #def __init__(self, index=0, **kwargs):
307         #       self.index = index
308
309         def get_penalties(self):
310                 # TODO: get penalties actually applied to a node from PLC DB.
311                 return [ n['name'] for n in SitePenalty.penalty_map ] 
312
313         def increase(self):
314                 self.index = self.index + 1
315                 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
316                 return True
317
318         def decrease(self):
319                 self.index = self.index - 1
320                 if self.index < 0: self.index = 0
321                 return True
322
323         def apply(self, host):
324
325                 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
326                         print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
327                         SitePenalty.penalty_map[i]['disable'](host)
328
329                 for i in range(0,self.index+1):
330                         print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
331                         SitePenalty.penalty_map[i]['enable'](host)
332
333                 return
334
335
336
337 class PersistSitePenalty(SitePenalty):
338         def __new__(typ, id, index, **kwargs):
339                 if 'db' in kwargs:
340                         db = kwargs['db']
341                 else:
342                         db = "persistpenalties"
343
344                 try:
345                         if 'reset' in kwargs and kwargs['reset'] == True:
346                                 database.dbDump(db, {})
347                         pm = database.dbLoad(db)
348                 except:
349                         database.dbDump(db, {})
350                         pm = database.dbLoad(db)
351
352                 #print pm
353                 if id in pm:
354                         print "Using existing object"
355                         obj = pm[id]
356                 else:
357                         print "creating new object"
358                         obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
359                         obj.id = id
360                         obj.index = index
361
362                 obj.db = db
363                 return obj
364
365         def __init__(self, id, index, **kwargs):
366                 self.id = id
367
368         def save(self):
369                 pm = database.dbLoad(self.db)
370                 pm[self.id] = self
371                 database.dbDump(self.db, pm)
372
373
374 class Target:
375         """
376                 Each host has a target set of attributes.  Some may be set manually,
377                 or others are set globally for the preferred target.
378
379                 For instance:
380                         All nodes in the Alpha or Beta group would have constraints like:
381                                 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
382         """
383         def __init__(self, constraints):
384                 self.constraints = constraints
385
386         def verify(self, data):
387                 """
388                         self.constraints is a list of key, value pairs.
389                         # [ {... : ...}==AND , ... , ... , ] == OR
390                 """
391                 con_or_true = False
392                 for con in self.constraints:
393                         #print "con: %s" % con
394                         con_and_true = True
395                         for key in con.keys():
396                                 #print "looking at key: %s" % key
397                                 if key in data: 
398                                         #print "%s %s" % (con[key], data[key])
399                                         con_and_true = con_and_true & (con[key] in data[key])
400                                 elif key not in data:
401                                         print "missing key %s" % key
402                                         con_and_true = False
403
404                         con_or_true = con_or_true | con_and_true
405
406                 return con_or_true
407
408 class Record(object):
409
410         def __init__(self, hostname, data):
411                 self.hostname = hostname
412                 self.data = data
413                 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
414                 self.loginbase = self.plcdb_hn2lb[self.hostname]
415                 return
416
417
418         def stageIswaitforever(self):
419                 if 'waitforever' in self.data['stage']:
420                         return True
421                 else:
422                         return False
423
424         def severity(self):
425                 category = self.data['category']
426                 prev_category = self.data['prev_category']
427                 #print "SEVERITY: ", category, prev_category
428                 val = cmpCategoryVal(category, prev_category)
429                 return val 
430
431         def improved(self):
432                 return self.severity() > 0
433         
434         def end_record(self):
435                 return node_end_record(self.hostname)
436
437         def reset_stage(self):
438                 self.data['stage'] = 'findbad'
439                 return True
440         
441         def getCategory(self):
442                 return self.data['category'].lower()
443
444         def getState(self):
445                 return self.data['state'].lower()
446
447         def getDaysDown(cls, diag_record):
448                 daysdown = -1
449                 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
450                         daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
451                 #elif diag_record['comonstats']['sshstatus'] != "null":
452                 #       daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
453                 #elif diag_record['comonstats']['lastcotop'] != "null":
454                 #       daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
455                 else:
456                         now = time.time()
457                         last_contact = diag_record['plcnode']['last_contact']
458                         if last_contact == None:
459                                 # the node has never been up, so give it a break
460                                 daysdown = -1
461                         else:
462                                 diff = now - last_contact
463                                 daysdown = diff // (60*60*24)
464                 return daysdown
465         getDaysDown = classmethod(getDaysDown)
466
467         def getStrDaysDown(cls, diag_record):
468                 daysdown = "unknown"
469                 last_contact = diag_record['plcnode']['last_contact']
470                 date_created = diag_record['plcnode']['date_created']
471
472                 if      diag_record['comonstats']['uptime'] != "null" and \
473                         diag_record['comonstats']['uptime'] != "-1":
474                         daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
475                         daysdown = "%d days up" % daysdown
476
477                 elif last_contact is None:
478                         if date_created is not None:
479                                 now = time.time()
480                                 diff = now - date_created
481                                 daysdown = diff // (60*60*24)
482                                 daysdown = "Never contacted PLC, created %s days ago" % daysdown
483                         else:
484                                 daysdown = "Never contacted PLC"
485                 else:
486                         now = time.time()
487                         diff = now - last_contact
488                         daysdown = diff // (60*60*24)
489                         daysdown = "%s days down" % daysdown
490                 return daysdown
491         getStrDaysDown = classmethod(getStrDaysDown)
492
493         #def getStrDaysDown(cls, diag_record):
494         #       daysdown = cls.getDaysDown(diag_record)
495         #       if daysdown > 0:
496         #               return "%d days down"%daysdown
497         #       elif daysdown == -1:
498         #               return "Never online"
499         #       else:
500         #               return "%d days up"% -daysdown
501         #getStrDaysDown = classmethod(getStrDaysDown)
502
503         def takeAction(self, index=0):
504                 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
505                 if 'improvement' in self.data['stage'] or self.improved() or \
506                         'monitor-end-record' in self.data['stage']:
507                         print "takeAction: decreasing penalty for %s"%self.hostname
508                         pp.decrease()
509                         pp.decrease()
510                 else:
511                         print "takeAction: increasing penalty for %s"%self.hostname
512                         pp.increase()
513                 pp.index = index
514                 pp.apply(self.hostname)
515                 pp.save()
516
517         def _format_diaginfo(self):
518                 info = self.data['info']
519                 print "FORMAT : STAGE: ", self.data['stage']
520                 if self.data['stage'] == 'monitor-end-record':
521                         if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
522                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
523                 else:
524                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
525                 return hlist
526         def saveAction(self):
527                 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
528                         return True
529                 else:
530                         return False
531
532         def getMessage(self, ticket_id=None):
533                 self.data['args']['hostname'] = self.hostname
534                 self.data['args']['loginbase'] = self.loginbase
535                 self.data['args']['hostname_list'] = self._format_diaginfo()
536                 #print self.data['message']
537                 if self.data['message']:
538                         message = PersistMessage(self.hostname, 
539                                                                  self.data['message'][0] % self.data['args'],
540                                                                  self.data['message'][1] % self.data['args'],
541                                                                  True, db='monitor_persistmessages',
542                                                                  ticket_id=ticket_id)
543                         if self.data['stage'] == "improvement":
544                                 message.reset()
545                         return message
546                 else:
547                         return None
548         
549         def getContacts(self):
550                 roles = self.data['email']
551
552                 if not config.mail and not config.debug and config.bcc:
553                         roles = ADMIN
554                 if config.mail and config.debug:
555                         roles = ADMIN
556
557                 # build targets
558                 contacts = []
559                 if ADMIN & roles:
560                         contacts += [config.email]
561                 if TECH & roles:
562                         contacts += [TECHEMAIL % self.loginbase]
563                 if PI & roles:
564                         contacts += [PIEMAIL % self.loginbase]
565                 if USER & roles:
566                         slices = plc.slices(self.loginbase)
567                         if len(slices) >= 1:
568                                 for slice in slices:
569                                         contacts += [SLICEMAIL % slice]
570                                 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
571                         else:
572                                 print "SLIC: %20s : 0 slices" % self.loginbase
573
574                 return contacts
575
576
577 class NodeRecord:
578         def __init__(self, hostname, target):
579                 self.hostname = hostname
580                 self.ticket = None
581                 self.target = target
582                 #if hostname in fb['nodes']:
583                 #       self.data = fb['nodes'][hostname]['values']
584                 #else:
585                 #       raise Exception("Hostname not in scan database")
586
587         def stageIswaitforever(self):
588                 if 'waitforever' in self.data['stage']:
589                         return True
590                 else:
591                         return False
592
593         def severity(self):
594                 category = self.data['category']
595                 prev_category = self.data['prev_category']
596                 print "IMPROVED: ", category, prev_category
597                 val = cmpCategoryVal(category, prev_category)
598                 return val 
599
600         def improved(self):
601                 return self.severity() > 0
602         
603         def end_record(self):
604                 return node_end_record(self.hostname)
605
606         def reset_stage(self):
607                 self.data['stage'] = 'findbad'
608                 return True
609
610         def open_tickets(self):
611                 if self.ticket and self.ticket.status['status'] == 'open':
612                         return 1
613                 return 0
614         def setIntrospect(self):
615                 pass
616
617         def email_notice(self):
618                 message = self._get_message_for_condition()
619                 message.send(self._get_contacts_for_condition())
620                 return True
621         def close_ticket(self):
622                 if self.ticket:
623                         self.ticket.closeTicket()
624
625         def exempt_from_penalties(self):
626                 bl = database.dbLoad("l_blacklist")
627                 return self.hostname in bl
628
629         def penalties(self):
630                 return []
631         def escellate_penalty(self):
632                 return True
633         def reduce_penalty(self):
634                 return True
635
636
637         def atTarget(self):
638                 return self.target.verify(self.data)
639
640         def _get_condition(self):
641                 return self.data['category'].lower()
642
643         def _get_stage(self):
644                 "improvement"
645                 "firstnotice_noop"
646                 "secondnotice_noslicecreation"
647                 "thirdnotice_disableslices"
648
649                 delta = current_time - self.data['time']
650
651         def _get_message_for_condition(self):
652                 pass
653         def _get_contacts_for_condition(self):
654                 pass
655
656 class Action(MonRecord):
657         def __init__(self, host, data):
658                 self.host = host
659                 MonRecord.__init__(self, data)
660                 return
661
662         def deltaDays(self, delta):
663                 t = datetime.fromtimestamp(self.__dict__['time'])
664                 d = t + timedelta(delta)
665                 self.__dict__['time'] = time.mktime(d.timetuple())
666                 
667 def node_end_record(node):
668         act_all = database.dbLoad("act_all")
669         if node not in act_all:
670                 del act_all
671                 return False
672
673         if len(act_all[node]) == 0:
674                 del act_all
675                 return False
676
677         pm = database.dbLoad("monitor_persistmessages")
678         if node not in pm:
679                 del pm
680                 return False
681         else:
682                 print "deleting node record"
683                 del pm[node]
684                 database.dbDump("monitor_persistmessages", pm)
685
686         a = Action(node, act_all[node][0])
687         a.delField('rt')
688         a.delField('found_rt_ticket')
689         a.delField('second-mail-at-oneweek')
690         a.delField('second-mail-at-twoweeks')
691         a.delField('first-found')
692         rec = a.get()
693         rec['action'] = ["close_rt"]
694         rec['category'] = "ALPHA"       # assume that it's up...
695         rec['stage'] = "monitor-end-record"
696         rec['ticket_id'] = None
697         rec['time'] = time.time() - 7*60*60*24
698         act_all[node].insert(0,rec)
699         database.dbDump("act_all", act_all)
700         del act_all
701         return True
702
703 if __name__ == "__main__":
704         #r = RT()
705         #r.email("test", "body of test message", ['database@cs.princeton.edu'])
706         #from emailTxt import mailtxt
707         print "loaded"
708         #database.dbDump("persistmessages", {});
709         #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
710         #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
711         #m.send(['soltesz@cs.utk.edu'])
712         #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
713         # TRICK timer to thinking some time has passed.
714         #m.actiontracker.time = time.time() - 6*60*60*24
715         #m.send(['soltesz@cs.utk.edu'])