f070a599d4ff10a1a542414fba69642d296ee98c
[monitor.git] / unified_model.py
1 #!/usr/bin/python
2
3 from monitor import database
4
5 from monitor.wrapper import plc
6 from monitor.wrapper import mailer
7 import time
8
9 from model import *
10 from monitor.const import *
11 from monitor import util
12 from monitor import config
13
14 def gethostlist(hostlist_file):
15         return util.file.getListFromFile(hostlist_file)
16
17 def array_to_priority_map(array):
18         """ Create a mapping where each entry of array is given a priority equal
19         to its position in the array.  This is useful for subsequent use in the
20         cmpMap() function."""
21         map = {}
22         count = 0
23         for i in array:
24                 map[i] = count
25                 count += 1
26         return map
27
28 def cmpValMap(v1, v2, map):
29         if v1 in map and v2 in map and map[v1] < map[v2]:
30                 return 1
31         elif v1 in map and v2 in map and map[v1] > map[v2]:
32                 return -1
33         elif v1 in map and v2 in map:
34                 return 0
35         else:
36                 raise Exception("No index %s or %s in map" % (v1, v2))
37
38 def cmpCategoryVal(v1, v2):
39         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
40         return cmpValMap(v1,v2,map)
41
42
43 class PCU:
44         def __init__(self, hostname):
45                 self.hostname = hostname
46
47         def reboot(self):
48                 return True
49         def available(self):
50                 return True
51         def previous_attempt(self):
52                 return True
53         def setValidMapping(self):
54                 pass
55
56 class Penalty:
57         def __init__(self, key, valuepattern, action):
58                 pass
59
60 class PenaltyMap:
61         def __init__(self):
62                 pass
63
64         # connect one penalty to another, in a FSM diagram.  After one
65         #       condition/penalty is applied, move to the next phase.
66
67
68 #fb = database.dbLoad("findbad")
69
70 class RT(object):
71         def __init__(self, ticket_id = None):
72                 self.ticket_id = ticket_id
73                 if self.ticket_id:
74                         print "getting ticket status",
75                         self.status = mailer.getTicketStatus(self.ticket_id)
76                         print self.status
77
78         def setTicketStatus(self, status):
79                 mailer.setTicketStatus(self.ticket_id, status)
80                 self.status = mailer.getTicketStatus(self.ticket_id)
81                 return True
82         
83         def getTicketStatus(self):
84                 if not self.status:
85                         self.status = mailer.getTicketStatus(self.ticket_id)
86                 return self.status
87
88         def closeTicket(self):
89                 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.") 
90
91         def email(self, subject, body, to):
92                 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
93                 return self.ticket_id
94
95 class Message(object):
96         def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
97                 self.via_rt = via_rt
98                 self.subject = subject
99                 self.message = message
100                 self.rt = RT(ticket_id)
101
102         def send(self, to):
103                 if self.via_rt:
104                         return self.rt.email(self.subject, self.message, to)
105                 else:
106                         return mailer.email(self.subject, self.message, to)
107
108 class Recent(object):
109         def __init__(self, withintime):
110                 self.withintime = withintime
111
112                 try:
113                         self.time = self.__getattribute__('time')
114                 except:
115                         self.time = time.time()- 7*24*60*60
116
117                 #self.time = time.time()
118                 #self.action_taken = False
119
120         def isRecent(self):
121                 if self.time + self.withintime < time.time():
122                         self.action_taken = False
123
124                 if self.time + self.withintime > time.time() and self.action_taken:
125                         return True
126                 else:
127                         return False
128
129         def unsetRecent(self):
130                 self.action_taken = False
131                 self.time = time.time()
132                 return True
133
134         def setRecent(self):
135                 self.action_taken = True
136                 self.time = time.time()
137                 return True
138                 
139 class PersistFlags(Recent):
140         def __new__(typ, id, *args, **kwargs):
141                 if 'db' in kwargs:
142                         db = kwargs['db']
143                         del kwargs['db']
144                 else:
145                         db = "persistflags"
146
147                 try:
148                         pm = database.dbLoad(db)
149                 except:
150                         database.dbDump(db, {})
151                         pm = database.dbLoad(db)
152                 #print pm
153                 if id in pm:
154                         obj = pm[id]
155                 else:
156                         obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
157                         for key in kwargs.keys():
158                                 obj.__setattr__(key, kwargs[key])
159                         obj.time = time.time()
160                         obj.action_taken = False
161
162                 obj.db = db
163                 return obj
164
165         def __init__(self, id, withintime, **kwargs):
166                 self.id = id
167                 Recent.__init__(self, withintime)
168
169         def save(self):
170                 pm = database.dbLoad(self.db)
171                 pm[self.id] = self
172                 database.dbDump(self.db, pm)
173
174         def resetFlag(self, name):
175                 self.__setattr__(name, False)
176
177         def setFlag(self, name):
178                 self.__setattr__(name, True)
179                 
180         def getFlag(self, name):
181                 try:
182                         return self.__getattribute__(name)
183                 except:
184                         self.__setattr__(name, False)
185                         return False
186
187         def resetRecentFlag(self, name):
188                 self.resetFlag(name)
189                 self.unsetRecent()
190
191         def setRecentFlag(self, name):
192                 self.setFlag(name)
193                 self.setRecent()
194
195         def getRecentFlag(self, name):
196                 # if recent and flag set -> true
197                 # else false
198                 try:
199                         return self.isRecent() & self.__getattribute__(name)
200                 except:
201                         self.__setattr__(name, False)
202                         return False
203
204         def checkattr(self, name):
205                 try:
206                         x = self.__getattribute__(name)
207                         return True
208                 except:
209                         return False
210                 
211
212 class PersistMessage(Message):
213         def __new__(typ, id, subject, message, via_rt, **kwargs):
214                 if 'db' in kwargs:
215                         db = kwargs['db']
216                 else:
217                         db = "persistmessages"
218
219                 try:
220                         pm = database.dbLoad(db)
221                 except:
222                         database.dbDump(db, {})
223                         pm = database.dbLoad(db)
224
225                 #print pm
226                 if id in pm:
227                         #print "Using existing object"
228                         obj = pm[id]
229                 else:
230                         #print "creating new object"
231                         obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
232                         obj.id = id
233                         obj.actiontracker = Recent(3*60*60*24)
234                         obj.ticket_id = None
235
236                 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
237                         obj.ticket_id = kwargs['ticket_id']
238
239                 obj.db = db
240                 return obj
241
242         def __init__(self, id, subject, message, via_rt=True, **kwargs):
243                 print "initializing object: %s" % self.ticket_id
244                 self.id = id
245                 Message.__init__(self, subject, message, via_rt, self.ticket_id)
246
247         def reset(self):
248                 self.actiontracker.unsetRecent()
249
250         def save(self):
251                 pm = database.dbLoad(self.db)
252                 pm[self.id] = self
253                 database.dbDump(self.db, pm)
254
255         def send(self, to):
256                 if not self.actiontracker.isRecent():
257                         self.ticket_id = Message.send(self, to)
258                         self.actiontracker.setRecent()
259                         self.save()
260                 else:
261                         # NOTE: only send a new message every week, regardless.
262                         print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
263
264 class MonitorMessage(object):
265         def __new__(typ, id, *args, **kwargs):
266                 if 'db' in kwargs:
267                         db = kwargs['db']
268                 else:
269                         db = "monitormessages"
270
271                 try:
272                         if 'reset' in kwargs and kwargs['reset'] == True:
273                                 database.dbDump(db, {})
274                         pm = database.dbLoad(db)
275                 except:
276                         database.dbDump(db, {})
277                         pm = database.dbLoad(db)
278
279                 #print pm
280                 if id in pm:
281                         print "Using existing object"
282                         obj = pm[id]
283                 else:
284                         print "creating new object"
285                         obj = super(object, typ).__new__(typ, id, *args, **kwargs)
286                         obj.id = id
287                         obj.sp = PersistSitePenalty(id, 0)
288
289                 obj.db = db
290                 return obj
291
292         def __init__(self, id, message):
293                 pass
294                 
295
296 class SitePenalty(object):
297         penalty_map = [] 
298         penalty_map.append( { 'name': 'noop',                   'enable'   : lambda host: None,
299                                                                                                         'disable'  : lambda host: None } )
300         penalty_map.append( { 'name': 'nocreate',               'enable'   : lambda host: plc.removeSliceCreation(host),
301                                                                                                         'disable'  : lambda host: plc.enableSliceCreation(host) } )
302         penalty_map.append( { 'name': 'suspendslices',  'enable'   : lambda host: plc.suspendSlices(host),
303                                                                                                         'disable'  : lambda host: plc.enableSlices(host) } )
304
305         #def __init__(self, index=0, **kwargs):
306         #       self.index = index
307
308         def get_penalties(self):
309                 # TODO: get penalties actually applied to a node from PLC DB.
310                 return [ n['name'] for n in SitePenalty.penalty_map ] 
311
312         def increase(self):
313                 self.index = self.index + 1
314                 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
315                 return True
316
317         def decrease(self):
318                 self.index = self.index - 1
319                 if self.index < 0: self.index = 0
320                 return True
321
322         def apply(self, host):
323
324                 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
325                         print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
326                         SitePenalty.penalty_map[i]['disable'](host)
327
328                 for i in range(0,self.index+1):
329                         print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
330                         SitePenalty.penalty_map[i]['enable'](host)
331
332                 return
333
334
335
336 class PersistSitePenalty(SitePenalty):
337         def __new__(typ, id, index, **kwargs):
338                 if 'db' in kwargs:
339                         db = kwargs['db']
340                 else:
341                         db = "persistpenalties"
342
343                 try:
344                         if 'reset' in kwargs and kwargs['reset'] == True:
345                                 database.dbDump(db, {})
346                         pm = database.dbLoad(db)
347                 except:
348                         database.dbDump(db, {})
349                         pm = database.dbLoad(db)
350
351                 #print pm
352                 if id in pm:
353                         print "Using existing object"
354                         obj = pm[id]
355                 else:
356                         print "creating new object"
357                         obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
358                         obj.id = id
359                         obj.index = index
360
361                 obj.db = db
362                 return obj
363
364         def __init__(self, id, index, **kwargs):
365                 self.id = id
366
367         def save(self):
368                 pm = database.dbLoad(self.db)
369                 pm[self.id] = self
370                 database.dbDump(self.db, pm)
371
372
373 class Target:
374         """
375                 Each host has a target set of attributes.  Some may be set manually,
376                 or others are set globally for the preferred target.
377
378                 For instance:
379                         All nodes in the Alpha or Beta group would have constraints like:
380                                 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
381         """
382         def __init__(self, constraints):
383                 self.constraints = constraints
384
385         def verify(self, data):
386                 """
387                         self.constraints is a list of key, value pairs.
388                         # [ {... : ...}==AND , ... , ... , ] == OR
389                 """
390                 con_or_true = False
391                 for con in self.constraints:
392                         #print "con: %s" % con
393                         con_and_true = True
394                         for key in con.keys():
395                                 #print "looking at key: %s" % key
396                                 if key in data: 
397                                         #print "%s %s" % (con[key], data[key])
398                                         con_and_true = con_and_true & (con[key] in data[key])
399                                 elif key not in data:
400                                         print "missing key %s" % key
401                                         con_and_true = False
402
403                         con_or_true = con_or_true | con_and_true
404
405                 return con_or_true
406
407 class Record(object):
408
409         def __init__(self, hostname, data):
410                 self.hostname = hostname
411                 self.data = data
412                 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
413                 self.loginbase = self.plcdb_hn2lb[self.hostname]
414                 return
415
416
417         def stageIswaitforever(self):
418                 if 'waitforever' in self.data['stage']:
419                         return True
420                 else:
421                         return False
422
423         def severity(self):
424                 category = self.data['category']
425                 prev_category = self.data['prev_category']
426                 #print "SEVERITY: ", category, prev_category
427                 val = cmpCategoryVal(category, prev_category)
428                 return val 
429
430         def improved(self):
431                 return self.severity() > 0
432         
433         def end_record(self):
434                 return node_end_record(self.hostname)
435
436         def reset_stage(self):
437                 self.data['stage'] = 'findbad'
438                 return True
439         
440         def getCategory(self):
441                 return self.data['category'].lower()
442
443         def getState(self):
444                 return self.data['state'].lower()
445
446         def getDaysDown(cls, diag_record):
447                 daysdown = -1
448                 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
449                         daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
450                 #elif diag_record['comonstats']['sshstatus'] != "null":
451                 #       daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
452                 #elif diag_record['comonstats']['lastcotop'] != "null":
453                 #       daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
454                 else:
455                         now = time.time()
456                         last_contact = diag_record['plcnode']['last_contact']
457                         if last_contact == None:
458                                 # the node has never been up, so give it a break
459                                 daysdown = -1
460                         else:
461                                 diff = now - last_contact
462                                 daysdown = diff // (60*60*24)
463                 return daysdown
464         getDaysDown = classmethod(getDaysDown)
465
466         def getStrDaysDown(cls, diag_record):
467                 daysdown = "unknown"
468                 last_contact = diag_record['plcnode']['last_contact']
469                 date_created = diag_record['plcnode']['date_created']
470
471                 if      diag_record['comonstats']['uptime'] != "null" and \
472                         diag_record['comonstats']['uptime'] != "-1":
473                         daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
474                         daysdown = "%d days up" % daysdown
475
476                 elif last_contact is None:
477                         if date_created is not None:
478                                 now = time.time()
479                                 diff = now - date_created
480                                 daysdown = diff // (60*60*24)
481                                 daysdown = "Never contacted PLC, created %s days ago" % daysdown
482                         else:
483                                 daysdown = "Never contacted PLC"
484                 else:
485                         now = time.time()
486                         diff = now - last_contact
487                         daysdown = diff // (60*60*24)
488                         daysdown = "%s days down" % daysdown
489                 return daysdown
490         getStrDaysDown = classmethod(getStrDaysDown)
491
492         #def getStrDaysDown(cls, diag_record):
493         #       daysdown = cls.getDaysDown(diag_record)
494         #       if daysdown > 0:
495         #               return "%d days down"%daysdown
496         #       elif daysdown == -1:
497         #               return "Never online"
498         #       else:
499         #               return "%d days up"% -daysdown
500         #getStrDaysDown = classmethod(getStrDaysDown)
501
502         def takeAction(self, index=0):
503                 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
504                 if 'improvement' in self.data['stage'] or self.improved() or \
505                         'monitor-end-record' in self.data['stage']:
506                         print "takeAction: decreasing penalty for %s"%self.hostname
507                         pp.decrease()
508                         pp.decrease()
509                 else:
510                         print "takeAction: increasing penalty for %s"%self.hostname
511                         pp.increase()
512                 pp.index = index
513                 pp.apply(self.hostname)
514                 pp.save()
515
516         def _format_diaginfo(self):
517                 info = self.data['info']
518                 print "FORMAT : STAGE: ", self.data['stage']
519                 if self.data['stage'] == 'monitor-end-record':
520                         if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
521                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
522                 else:
523                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
524                 return hlist
525         def saveAction(self):
526                 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
527                         return True
528                 else:
529                         return False
530
531         def getMessage(self, ticket_id=None):
532                 self.data['args']['hostname'] = self.hostname
533                 self.data['args']['loginbase'] = self.loginbase
534                 self.data['args']['hostname_list'] = self._format_diaginfo()
535                 #print self.data['message']
536                 if self.data['message']:
537                         message = PersistMessage(self.hostname, 
538                                                                  self.data['message'][0] % self.data['args'],
539                                                                  self.data['message'][1] % self.data['args'],
540                                                                  True, db='monitor_persistmessages',
541                                                                  ticket_id=ticket_id)
542                         return message
543                 else:
544                         return None
545         
546         def getContacts(self):
547                 roles = self.data['email']
548
549                 if not config.mail and not config.debug and config.bcc:
550                         roles = ADMIN
551                 if config.mail and config.debug:
552                         roles = ADMIN
553
554                 # build targets
555                 contacts = []
556                 if ADMIN & roles:
557                         contacts += [config.email]
558                 if TECH & roles:
559                         contacts += [TECHEMAIL % self.loginbase]
560                 if PI & roles:
561                         contacts += [PIEMAIL % self.loginbase]
562                 if USER & roles:
563                         slices = plc.slices(self.loginbase)
564                         if len(slices) >= 1:
565                                 for slice in slices:
566                                         contacts += [SLICEMAIL % slice]
567                                 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
568                         else:
569                                 print "SLIC: %20s : 0 slices" % self.loginbase
570
571                 return contacts
572
573
574 class NodeRecord:
575         def __init__(self, hostname, target):
576                 self.hostname = hostname
577                 self.ticket = None
578                 self.target = target
579                 #if hostname in fb['nodes']:
580                 #       self.data = fb['nodes'][hostname]['values']
581                 #else:
582                 #       raise Exception("Hostname not in scan database")
583
584         def stageIswaitforever(self):
585                 if 'waitforever' in self.data['stage']:
586                         return True
587                 else:
588                         return False
589
590         def severity(self):
591                 category = self.data['category']
592                 prev_category = self.data['prev_category']
593                 print "IMPROVED: ", category, prev_category
594                 val = cmpCategoryVal(category, prev_category)
595                 return val 
596
597         def improved(self):
598                 return self.severity() > 0
599         
600         def end_record(self):
601                 return node_end_record(self.hostname)
602
603         def reset_stage(self):
604                 self.data['stage'] = 'findbad'
605                 return True
606
607         def open_tickets(self):
608                 if self.ticket and self.ticket.status['status'] == 'open':
609                         return 1
610                 return 0
611         def setIntrospect(self):
612                 pass
613
614         def email_notice(self):
615                 message = self._get_message_for_condition()
616                 message.send(self._get_contacts_for_condition())
617                 return True
618         def close_ticket(self):
619                 if self.ticket:
620                         self.ticket.closeTicket()
621
622         def exempt_from_penalties(self):
623                 bl = database.dbLoad("l_blacklist")
624                 return self.hostname in bl
625
626         def penalties(self):
627                 return []
628         def escellate_penalty(self):
629                 return True
630         def reduce_penalty(self):
631                 return True
632
633
634         def atTarget(self):
635                 return self.target.verify(self.data)
636
637         def _get_condition(self):
638                 return self.data['category'].lower()
639
640         def _get_stage(self):
641                 "improvement"
642                 "firstnotice_noop"
643                 "secondnotice_noslicecreation"
644                 "thirdnotice_disableslices"
645
646                 delta = current_time - self.data['time']
647
648         def _get_message_for_condition(self):
649                 pass
650         def _get_contacts_for_condition(self):
651                 pass
652
653 class Action(MonRecord):
654         def __init__(self, host, data):
655                 self.host = host
656                 MonRecord.__init__(self, data)
657                 return
658
659         def deltaDays(self, delta):
660                 t = datetime.fromtimestamp(self.__dict__['time'])
661                 d = t + timedelta(delta)
662                 self.__dict__['time'] = time.mktime(d.timetuple())
663                 
664 def node_end_record(node):
665         act_all = database.dbLoad("act_all")
666         if node not in act_all:
667                 del act_all
668                 return False
669
670         if len(act_all[node]) == 0:
671                 del act_all
672                 return False
673
674         pm = database.dbLoad("monitor_persistmessages")
675         if node not in pm:
676                 del pm
677                 return False
678         else:
679                 print "deleting node record"
680                 del pm[node]
681                 database.dbDump("monitor_persistmessages", pm)
682
683         a = Action(node, act_all[node][0])
684         a.delField('rt')
685         a.delField('found_rt_ticket')
686         a.delField('second-mail-at-oneweek')
687         a.delField('second-mail-at-twoweeks')
688         a.delField('first-found')
689         rec = a.get()
690         rec['action'] = ["close_rt"]
691         rec['category'] = "ALPHA"       # assume that it's up...
692         rec['stage'] = "monitor-end-record"
693         rec['ticket_id'] = None
694         rec['time'] = time.time() - 7*60*60*24
695         act_all[node].insert(0,rec)
696         database.dbDump("act_all", act_all)
697         del act_all
698         return True
699
700 if __name__ == "__main__":
701         #r = RT()
702         #r.email("test", "body of test message", ['database@cs.princeton.edu'])
703         #from emailTxt import mailtxt
704         print "loaded"
705         #database.dbDump("persistmessages", {});
706         #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
707         #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
708         #m.send(['soltesz@cs.utk.edu'])
709         #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
710         # TRICK timer to thinking some time has passed.
711         #m.actiontracker.time = time.time() - 6*60*60*24
712         #m.send(['soltesz@cs.utk.edu'])