pcucontrol owned in %files section
[monitor.git] / unified_model.py
1 #!/usr/bin/python
2
3 from monitor import database
4
5 from monitor.wrapper import plc, plccache
6 from monitor.wrapper import mailer
7 import time
8
9 from model import *
10 from monitor.const import *
11 from monitor import util
12 from monitor import config
13
14 def gethostlist(hostlist_file):
15         return util.file.getListFromFile(hostlist_file)
16
17 def array_to_priority_map(array):
18         """ Create a mapping where each entry of array is given a priority equal
19         to its position in the array.  This is useful for subsequent use in the
20         cmpMap() function."""
21         map = {}
22         count = 0
23         for i in array:
24                 map[i] = count
25                 count += 1
26         return map
27
28 def cmpValMap(v1, v2, map):
29         if v1 in map and v2 in map and map[v1] < map[v2]:
30                 return 1
31         elif v1 in map and v2 in map and map[v1] > map[v2]:
32                 return -1
33         elif v1 in map and v2 in map:
34                 return 0
35         else:
36                 raise Exception("No index %s or %s in map" % (v1, v2))
37
38 def cmpCategoryVal(v1, v2):
39         # Terrible hack to manage migration to no more 'ALPHA' states.
40         if v1 == 'ALPHA': v1 = "PROD"
41         if v2 == 'ALPHA': v2 = "PROD"
42         #map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
43         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
44         return cmpValMap(v1,v2,map)
45
46
47 class PCU:
48         def __init__(self, hostname):
49                 self.hostname = hostname
50
51         def reboot(self):
52                 return True
53         def available(self):
54                 return True
55         def previous_attempt(self):
56                 return True
57         def setValidMapping(self):
58                 pass
59
60 class Penalty:
61         def __init__(self, key, valuepattern, action):
62                 pass
63
64 class PenaltyMap:
65         def __init__(self):
66                 pass
67
68         # connect one penalty to another, in a FSM diagram.  After one
69         #       condition/penalty is applied, move to the next phase.
70
71
72 class RT(object):
73         def __init__(self, ticket_id = None):
74                 self.ticket_id = ticket_id
75                 if self.ticket_id:
76                         print "getting ticket status",
77                         self.status = mailer.getTicketStatus(self.ticket_id)
78                         print self.status
79
80         def setTicketStatus(self, status):
81                 mailer.setTicketStatus(self.ticket_id, status)
82                 self.status = mailer.getTicketStatus(self.ticket_id)
83                 return True
84         
85         def getTicketStatus(self):
86                 if not self.status:
87                         self.status = mailer.getTicketStatus(self.ticket_id)
88                 return self.status
89
90         def closeTicket(self):
91                 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.") 
92
93         def email(self, subject, body, to):
94                 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
95                 return self.ticket_id
96
97 class Message(object):
98         def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
99                 self.via_rt = via_rt
100                 self.subject = subject
101                 self.message = message
102                 self.rt = RT(ticket_id)
103
104         def send(self, to):
105                 if self.via_rt:
106                         return self.rt.email(self.subject, self.message, to)
107                 else:
108                         return mailer.email(self.subject, self.message, to)
109
110 class Recent(object):
111         def __init__(self, withintime):
112                 self.withintime = withintime
113
114                 try:
115                         self.time = self.__getattribute__('time')
116                 except:
117                         self.time = time.time()- 7*24*60*60
118
119                 #self.time = time.time()
120                 #self.action_taken = False
121
122         def isRecent(self):
123                 if self.time + self.withintime < time.time():
124                         self.action_taken = False
125
126                 if self.time + self.withintime > time.time() and self.action_taken:
127                         return True
128                 else:
129                         return False
130
131         def unsetRecent(self):
132                 self.action_taken = False
133                 self.time = time.time()
134                 return True
135
136         def setRecent(self):
137                 self.action_taken = True
138                 self.time = time.time()
139                 return True
140                 
141 class PersistFlags(Recent):
142         def __new__(typ, id, *args, **kwargs):
143                 if 'db' in kwargs:
144                         db = kwargs['db']
145                         del kwargs['db']
146                 else:
147                         db = "persistflags"
148
149                 try:
150                         pm = database.dbLoad(db)
151                 except:
152                         database.dbDump(db, {})
153                         pm = database.dbLoad(db)
154                 #print pm
155                 if id in pm:
156                         obj = pm[id]
157                 else:
158                         obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
159                         for key in kwargs.keys():
160                                 obj.__setattr__(key, kwargs[key])
161                         obj.time = time.time()
162                         obj.action_taken = False
163
164                 obj.db = db
165                 return obj
166
167         def __init__(self, id, withintime, **kwargs):
168                 self.id = id
169                 Recent.__init__(self, withintime)
170
171         def save(self):
172                 pm = database.dbLoad(self.db)
173                 pm[self.id] = self
174                 database.dbDump(self.db, pm)
175
176         def resetFlag(self, name):
177                 self.__setattr__(name, False)
178
179         def setFlag(self, name):
180                 self.__setattr__(name, True)
181                 
182         def getFlag(self, name):
183                 try:
184                         return self.__getattribute__(name)
185                 except:
186                         self.__setattr__(name, False)
187                         return False
188
189         def resetRecentFlag(self, name):
190                 self.resetFlag(name)
191                 self.unsetRecent()
192
193         def setRecentFlag(self, name):
194                 self.setFlag(name)
195                 self.setRecent()
196
197         def getRecentFlag(self, name):
198                 # if recent and flag set -> true
199                 # else false
200                 try:
201                         return self.isRecent() & self.__getattribute__(name)
202                 except:
203                         self.__setattr__(name, False)
204                         return False
205
206         def checkattr(self, name):
207                 try:
208                         x = self.__getattribute__(name)
209                         return True
210                 except:
211                         return False
212                 
213
214 class PersistMessage(Message):
215         def __new__(typ, id, subject, message, via_rt, **kwargs):
216                 if 'db' in kwargs:
217                         db = kwargs['db']
218                 else:
219                         db = "persistmessages"
220
221                 try:
222                         pm = database.dbLoad(db)
223                 except:
224                         database.dbDump(db, {})
225                         pm = database.dbLoad(db)
226
227                 #print pm
228                 if id in pm:
229                         #print "Using existing object"
230                         obj = pm[id]
231                 else:
232                         #print "creating new object"
233                         obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
234                         obj.id = id
235                         obj.actiontracker = Recent(1*60*60*24)
236                         obj.ticket_id = None
237
238                 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
239                         obj.ticket_id = kwargs['ticket_id']
240
241                 obj.db = db
242                 return obj
243
244         def __init__(self, id, subject, message, via_rt=True, **kwargs):
245                 print "initializing object: %s" % self.ticket_id
246                 self.id = id
247                 Message.__init__(self, subject, message, via_rt, self.ticket_id)
248
249         def reset(self):
250                 self.actiontracker.unsetRecent()
251
252         def save(self):
253                 pm = database.dbLoad(self.db)
254                 pm[self.id] = self
255                 database.dbDump(self.db, pm)
256
257         def send(self, to):
258                 if not self.actiontracker.isRecent():
259                         self.ticket_id = Message.send(self, to)
260                         self.actiontracker.setRecent()
261                         self.save()
262                 else:
263                         # NOTE: only send a new message every week, regardless.
264                         # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
265                         print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
266
267 class MonitorMessage(object):
268         def __new__(typ, id, *args, **kwargs):
269                 if 'db' in kwargs:
270                         db = kwargs['db']
271                 else:
272                         db = "monitormessages"
273
274                 try:
275                         if 'reset' in kwargs and kwargs['reset'] == True:
276                                 database.dbDump(db, {})
277                         pm = database.dbLoad(db)
278                 except:
279                         database.dbDump(db, {})
280                         pm = database.dbLoad(db)
281
282                 #print pm
283                 if id in pm:
284                         print "Using existing object"
285                         obj = pm[id]
286                 else:
287                         print "creating new object"
288                         obj = super(object, typ).__new__(typ, id, *args, **kwargs)
289                         obj.id = id
290                         obj.sp = PersistSitePenalty(id, 0)
291
292                 obj.db = db
293                 return obj
294
295         def __init__(self, id, message):
296                 pass
297                 
298
299 class SitePenalty(object):
300         penalty_map = [] 
301         penalty_map.append( { 'name': 'noop',                   'enable'   : lambda host: None,
302                                                                                                         'disable'  : lambda host: None } )
303         penalty_map.append( { 'name': 'nocreate',               'enable'   : lambda host: plc.removeSliceCreation(host),
304                                                                                                         'disable'  : lambda host: plc.enableSliceCreation(host) } )
305         penalty_map.append( { 'name': 'suspendslices',  'enable'   : lambda host: plc.suspendSlices(host),
306                                                                                                         'disable'  : lambda host: plc.enableSlices(host) } )
307
308         #def __init__(self, index=0, **kwargs):
309         #       self.index = index
310
311         def get_penalties(self):
312                 # TODO: get penalties actually applied to a node from PLC DB.
313                 return [ n['name'] for n in SitePenalty.penalty_map ] 
314
315         def increase(self):
316                 self.index = self.index + 1
317                 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
318                 return True
319
320         def decrease(self):
321                 self.index = self.index - 1
322                 if self.index < 0: self.index = 0
323                 return True
324
325         def apply(self, host):
326
327                 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
328                         print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
329                         SitePenalty.penalty_map[i]['disable'](host)
330
331                 for i in range(0,self.index+1):
332                         print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
333                         SitePenalty.penalty_map[i]['enable'](host)
334
335                 return
336
337
338
339 class PersistSitePenalty(SitePenalty):
340         def __new__(typ, id, index, **kwargs):
341                 if 'db' in kwargs:
342                         db = kwargs['db']
343                 else:
344                         db = "persistpenalties"
345
346                 try:
347                         if 'reset' in kwargs and kwargs['reset'] == True:
348                                 database.dbDump(db, {})
349                         pm = database.dbLoad(db)
350                 except:
351                         database.dbDump(db, {})
352                         pm = database.dbLoad(db)
353
354                 #print pm
355                 if id in pm:
356                         print "Using existing object"
357                         obj = pm[id]
358                 else:
359                         print "creating new object"
360                         obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
361                         obj.id = id
362                         obj.index = index
363
364                 obj.db = db
365                 return obj
366
367         def __init__(self, id, index, **kwargs):
368                 self.id = id
369
370         def save(self):
371                 pm = database.dbLoad(self.db)
372                 pm[self.id] = self
373                 database.dbDump(self.db, pm)
374
375
376 class Target:
377         """
378                 Each host has a target set of attributes.  Some may be set manually,
379                 or others are set globally for the preferred target.
380
381                 For instance:
382                         All nodes in the Alpha or Beta group would have constraints like:
383                                 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
384         """
385         def __init__(self, constraints):
386                 self.constraints = constraints
387
388         def verify(self, data):
389                 """
390                         self.constraints is a list of key, value pairs.
391                         # [ {... : ...}==AND , ... , ... , ] == OR
392                 """
393                 con_or_true = False
394                 for con in self.constraints:
395                         #print "con: %s" % con
396                         con_and_true = True
397                         for key in con.keys():
398                                 #print "looking at key: %s" % key
399                                 if key in data: 
400                                         #print "%s %s" % (con[key], data[key])
401                                         con_and_true = con_and_true & (con[key] in data[key])
402                                 elif key not in data:
403                                         print "missing key %s" % key
404                                         con_and_true = False
405
406                         con_or_true = con_or_true | con_and_true
407
408                 return con_or_true
409
410 class Record(object):
411
412         def __init__(self, hostname, data):
413                 self.hostname = hostname
414                 self.data = data
415                 self.plcdb_hn2lb = plccache.plcdb_hn2lb
416                 self.loginbase = self.plcdb_hn2lb[self.hostname]
417                 return
418
419
420         def stageIswaitforever(self):
421                 if 'waitforever' in self.data['stage']:
422                         return True
423                 else:
424                         return False
425
426         def severity(self):
427                 category = self.data['category']
428                 prev_category = self.data['prev_category']
429                 #print "SEVERITY: ", category, prev_category
430                 val = cmpCategoryVal(category, prev_category)
431                 return val 
432
433         def improved(self):
434                 return self.severity() > 0
435         
436         def end_record(self):
437                 return node_end_record(self.hostname)
438
439         def reset_stage(self):
440                 self.data['stage'] = 'findbad'
441                 return True
442         
443         def getCategory(self):
444                 return self.data['category'].lower()
445
446         def getState(self):
447                 return self.data['state'].lower()
448
449         def getDaysDown(cls, diag_record):
450                 daysdown = -1
451                 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
452                         daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
453                 #elif diag_record['comonstats']['sshstatus'] != "null":
454                 #       daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
455                 #elif diag_record['comonstats']['lastcotop'] != "null":
456                 #       daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
457                 else:
458                         now = time.time()
459                         last_contact = diag_record['plcnode']['last_contact']
460                         if last_contact == None:
461                                 # the node has never been up, so give it a break
462                                 daysdown = -1
463                         else:
464                                 diff = now - last_contact
465                                 daysdown = diff // (60*60*24)
466                 return daysdown
467         getDaysDown = classmethod(getDaysDown)
468
469         def getStrDaysDown(cls, diag_record):
470                 daysdown = "unknown"
471                 last_contact = diag_record['plcnode']['last_contact']
472                 date_created = diag_record['plcnode']['date_created']
473
474                 if      diag_record['comonstats']['uptime'] != "null" and \
475                         diag_record['comonstats']['uptime'] != "-1":
476                         daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
477                         daysdown = "%d days up" % daysdown
478
479                 elif last_contact is None:
480                         if date_created is not None:
481                                 now = time.time()
482                                 diff = now - date_created
483                                 daysdown = diff // (60*60*24)
484                                 daysdown = "Never contacted PLC, created %s days ago" % daysdown
485                         else:
486                                 daysdown = "Never contacted PLC"
487                 else:
488                         now = time.time()
489                         diff = now - last_contact
490                         daysdown = diff // (60*60*24)
491                         daysdown = "%s days down" % daysdown
492                 return daysdown
493         getStrDaysDown = classmethod(getStrDaysDown)
494
495         def getSendEmailFlag(self):
496                 if not config.mail:
497                         return False
498
499                 # resend if open & created longer than 30 days ago.
500                 if  'rt' in self.data and \
501                         'Status' in self.data['rt'] and \
502                         "open" in self.data['rt']['Status'] and \
503                         self.data['rt']['Created'] > int(time.time() - 60*60*24*30):
504                         # if created-time is greater than the thirty days ago from the current time
505                         return False
506
507                 return True
508
509         def getMostRecentStage(self):
510                 lastact = self.data['last_action_record']
511                 return lastact.stage
512
513         def getMostRecentTime(self):
514                 lastact = self.data['last_action_record']
515                 return lastact.date_action_taken
516
517         def takeAction(self, index=0):
518                 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
519                 if 'improvement' in self.data['stage'] or self.improved() or \
520                         'monitor-end-record' in self.data['stage']:
521                         print "takeAction: decreasing penalty for %s"%self.hostname
522                         pp.decrease()
523                         pp.decrease()
524                 else:
525                         print "takeAction: increasing penalty for %s"%self.hostname
526                         pp.increase()
527                 pp.index = index
528                 pp.apply(self.hostname)
529                 pp.save()
530
531         def _format_diaginfo(self):
532                 info = self.data['info']
533                 print "FORMAT : STAGE: ", self.data['stage']
534                 if self.data['stage'] == 'monitor-end-record':
535                         if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
536                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
537                 else:
538                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
539                 return hlist
540         def saveAction(self):
541                 if 'save_act_all' in self.data and self.data['save_act_all'] == True:
542                         return True
543                 else:
544                         return False
545
546         def getMessage(self, ticket_id=None):
547                 self.data['args']['hostname'] = self.hostname
548                 self.data['args']['loginbase'] = self.loginbase
549                 self.data['args']['hostname_list'] = self._format_diaginfo()
550                 #print self.data['message']
551                 if self.data['message']:
552                         message = PersistMessage(self.hostname, 
553                                                                  self.data['message'][0] % self.data['args'],
554                                                                  self.data['message'][1] % self.data['args'],
555                                                                  True, db='monitor_persistmessages',
556                                                                  ticket_id=ticket_id)
557                         if self.data['stage'] == "improvement":
558                                 message.reset()
559                         return message
560                 else:
561                         return None
562         
563         def getContacts(self):
564                 roles = self.data['email']
565
566                 if not config.mail and not config.debug and config.bcc:
567                         roles = ADMIN
568                 if config.mail and config.debug:
569                         roles = ADMIN
570
571                 # build targets
572                 contacts = []
573                 if ADMIN & roles:
574                         contacts += [config.email]
575                 if TECH & roles:
576                         #contacts += [TECHEMAIL % self.loginbase]
577                         contacts += plc.getTechEmails(self.loginbase)
578                 if PI & roles:
579                         #contacts += [PIEMAIL % self.loginbase]
580                         contacts += plc.getSliceUserEmails(self.loginbase)
581                 if USER & roles:
582                         contacts += plc.getSliceUserEmails(self.loginbase)
583                         slices = plc.slices(self.loginbase)
584                         if len(slices) >= 1:
585                                 #for slice in slices:
586                                 #       contacts += [SLICEMAIL % slice]
587                                 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
588                         else:
589                                 print "SLIC: %20s : 0 slices" % self.loginbase
590
591                 return contacts
592
593
594 class NodeRecord:
595         def __init__(self, hostname, target):
596                 self.hostname = hostname
597                 self.ticket = None
598                 self.target = target
599
600 class Action(MonRecord):
601         def __init__(self, host, data):
602                 self.host = host
603                 MonRecord.__init__(self, data)
604                 return
605
606         def deltaDays(self, delta):
607                 t = datetime.fromtimestamp(self.__dict__['time'])
608                 d = t + timedelta(delta)
609                 self.__dict__['time'] = time.mktime(d.timetuple())
610                 
611 def node_end_record(node):
612         act_all = database.dbLoad("act_all")
613         if node not in act_all:
614                 del act_all
615                 return False
616
617         if len(act_all[node]) == 0:
618                 del act_all
619                 return False
620
621         pm = database.dbLoad("monitor_persistmessages")
622         if node not in pm:
623                 del pm
624                 return False
625         else:
626                 print "deleting node record"
627                 del pm[node]
628                 database.dbDump("monitor_persistmessages", pm)
629
630         a = Action(node, act_all[node][0])
631         a.delField('rt')
632         a.delField('found_rt_ticket')
633         a.delField('second-mail-at-oneweek')
634         a.delField('second-mail-at-twoweeks')
635         a.delField('first-found')
636         rec = a.get()
637         rec['action'] = ["close_rt"]
638         rec['category'] = "ALPHA"       # assume that it's up...
639         rec['stage'] = "monitor-end-record"
640         rec['ticket_id'] = None
641         rec['time'] = time.time() - 7*60*60*24
642         act_all[node].insert(0,rec)
643         database.dbDump("act_all", act_all)
644         del act_all
645         return True
646
647 if __name__ == "__main__":
648         #r = RT()
649         #r.email("test", "body of test message", ['database@cs.princeton.edu'])
650         #from emailTxt import mailtxt
651         print "loaded"
652         #database.dbDump("persistmessages", {});
653         #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
654         #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
655         #m.send(['soltesz@cs.utk.edu'])
656         #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
657         # TRICK timer to thinking some time has passed.
658         #m.actiontracker.time = time.time() - 6*60*60*24
659         #m.send(['soltesz@cs.utk.edu'])