Take PCUs into account. Need to test.
[monitor.git] / policy.py
1 #
2 # Copyright (c) 2004  The Trustees of Princeton University (Trustees).
3 #
4 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
5 #
6 # $Id: policy.py,v 1.17 2007/08/29 17:26:50 soltesz Exp $
7 #
8 # Policy Engine.
9
10 #from monitor import *
11 from threading import *
12 import time
13 import logging
14 import mailer
15 import emailTxt
16 import pickle
17 import Queue
18 import plc
19 import sys
20 import reboot
21 import soltesz
22 import string
23 from www.printbadnodes import cmpCategoryVal
24 from config import config
25 print "policy"
26 config = config()
27
28 DAT="./monitor.dat"
29
30 logger = logging.getLogger("monitor")
31
32 # Time to enforce policy
33 POLSLEEP = 7200
34
35 # Where to email the summary
36 SUMTO = "soltesz@cs.princeton.edu"
37 TECHEMAIL="tech-%s@sites.planet-lab.org"
38 PIEMAIL="pi-%s@sites.planet-lab.org"
39 SLICEMAIL="%s@slices.planet-lab.org"
40 PLCEMAIL="support@planet-lab.org"
41
42 #Thresholds (DAYS)
43 SPERMIN = 60
44 SPERHOUR = 60*60
45 SPERDAY = 86400
46 PITHRESH = 7 * SPERDAY
47 SLICETHRESH = 7 * SPERDAY
48 # Days before attempting rins again
49 RINSTHRESH = 5 * SPERDAY
50
51 # Days before calling the node dead.
52 DEADTHRESH = 30 * SPERDAY
53 # Minimum number of nodes up before squeezing
54 MINUP = 2
55
56 TECH=1
57 PI=2
58 USER=4
59 ADMIN=8
60
61 # IF:
62 #  no SSH, down.
63 #  bad disk, down
64 #  DNS, kinda down (sick)
65 #  clock, kinda down (sick)
66 #  Full disk, going to be down
67
68 # Actions:
69 #  Email
70 #  suspend slice creation
71 #  kill slices
72 def array_to_priority_map(array):
73         """ Create a mapping where each entry of array is given a priority equal
74         to its position in the array.  This is useful for subsequent use in the
75         cmpMap() function."""
76         map = {}
77         count = 0
78         for i in array:
79                 map[i] = count
80                 count += 1
81         return map
82
83 def getdebug():
84         return config.debug
85
86 def print_stats(key, stats):
87         if key in stats: print "%20s : %d" % (key, stats[key])
88
89 class Merge(Thread):
90         def __init__(self, l_merge, toRT):
91                 self.toRT = toRT
92                 self.merge_list = l_merge
93                 # the hostname to loginbase mapping
94                 self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
95
96                 # Previous actions taken on nodes.
97                 self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {})
98                 self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
99
100                 self.cache_all = soltesz.if_cached_else(1, "act_all", lambda : {})
101                 self.sickdb = {}
102                 self.mergedb = {}
103                 Thread.__init__(self)
104
105         def run(self):
106                 # populate sickdb
107                 self.accumSickSites()
108                 # read data from findbad and act_all
109                 self.mergeActionsAndBadDB()
110                 # pass node_records to RT
111                 self.sendToRT()
112
113         def accumSickSites(self):
114                 """
115                 Take all nodes, from l_diagnose, look them up in the act_all database, 
116                 and insert them into sickdb[] as:
117
118                         sickdb[loginbase][nodename] = fb_record
119                 """
120                 # look at all problems reported by findbad
121                 l_nodes = self.findbad['nodes'].keys()
122                 count = 0
123                 for nodename in l_nodes:
124                         if nodename not in self.merge_list:
125                                 continue                # skip this node, since it's not wanted
126
127                         count += 1
128                         loginbase = self.plcdb_hn2lb[nodename]
129                         values = self.findbad['nodes'][nodename]['values']
130
131                         fb_record = {}
132                         fb_record['nodename'] = nodename
133                         fb_record['category'] = values['category']
134                         fb_record['state'] = values['state']
135                         fb_record['comonstats'] = values['comonstats']
136                         fb_record['plcnode'] = values['plcnode']
137                         fb_record['kernel'] = self.getKernel(values['kernel'])
138                         fb_record['stage'] = "findbad"
139                         fb_record['message'] = None
140                         fb_record['bootcd'] = values['bootcd']
141                         fb_record['args'] = None
142                         fb_record['info'] = None
143                         fb_record['time'] = time.time()
144                         fb_record['date_created'] = time.time()
145
146                         if loginbase not in self.sickdb:
147                                 self.sickdb[loginbase] = {}
148
149                         self.sickdb[loginbase][nodename] = fb_record
150
151                 print "Found %d nodes" % count
152
153         def getKernel(self, unamestr):
154                 s = unamestr.split()
155                 if len(s) > 2:
156                         return s[2]
157                 else:
158                         return ""
159
160         def mergeActionsAndBadDB(self): 
161                 """
162                 - Look at the sick node_records as reported in findbad, 
163                 - Then look at the node_records in act_all.  
164
165                 There are four cases:
166                 1) Problem in findbad, no problem in act_all
167                         this ok, b/c it just means it's a new problem
168                 2) Problem in findbad, problem in act_all
169                         -Did the problem get better or worse?  
170                                 -If Same, or Worse, then continue looking for open tickets.
171                                 -If Better, or No problem, then "back-off" penalties.
172                                         This judgement may need to wait until 'Diagnose()'
173
174                 3) No problem in findbad, problem in act_all
175                         The the node is operational again according to Findbad()
176
177                 4) No problem in findbad, no problem in act_all
178                         There won't be a record in either db, so there's no code.
179                 """
180
181                 sorted_sites = self.sickdb.keys()
182                 sorted_sites.sort()
183                 # look at all problems reported by findbad
184                 for loginbase in sorted_sites:
185                         d_fb_nodes = self.sickdb[loginbase]
186                         sorted_nodes = d_fb_nodes.keys()
187                         sorted_nodes.sort()
188                         for nodename in sorted_nodes:
189                                 fb_record = self.sickdb[loginbase][nodename]
190                                 x = fb_record
191                                 if loginbase not in self.mergedb:
192                                         self.mergedb[loginbase] = {}
193
194                                 # We must compare findbad state with act_all state
195                                 if nodename not in self.act_all:
196                                         # 1) ok, b/c it's a new problem. set ticket_id to null
197                                         self.mergedb[loginbase][nodename] = {} 
198                                         self.mergedb[loginbase][nodename].update(x)
199                                         self.mergedb[loginbase][nodename]['ticket_id'] = ""
200                                         self.mergedb[loginbase][nodename]['prev_category'] = None
201                                 else: 
202                                         if len(self.act_all[nodename]) == 0:
203                                                 print "len(act_all[%s]) == 0, skipping %s %s" % (nodename, loginbase, nodename)
204                                                 continue
205
206                                         y = self.act_all[nodename][0]
207
208                                         # skip if end-stage
209                                         if 'stage' in y and "monitor-end-record" in y['stage']:
210                                                 # 1) ok, b/c it's a new problem. set ticket_id to null
211                                                 self.mergedb[loginbase][nodename] = {} 
212                                                 self.mergedb[loginbase][nodename].update(x)
213                                                 self.mergedb[loginbase][nodename]['ticket_id'] = ""
214                                                 self.mergedb[loginbase][nodename]['prev_category'] = None
215                                                 continue
216
217                                         ## for legacy actions
218                                         #if 'bucket' in y and y['bucket'][0] == 'dbg':
219                                         #       # Only bootcd debugs made it to the act_all db.
220                                         #       y['prev_category'] = "OLDBOOTCD"
221                                         #elif 'bucket' in y and y['bucket'][0] == 'down':
222                                         #       y['prev_category'] = "ERROR"
223                                         #elif 'bucket' not in y:
224                                         #       # for all other actions, just carry over the
225                                         #       # previous category
226                                         #       y['prev_category'] = y['category']
227                                         #else:
228                                         #       print "UNKNOWN state for record: %s" % y
229                                         #       sys.exit(1)
230
231                                         # determine through translation, if the buckets match
232                                         #if 'category' in y and x['category'] == y['category']:
233                                         #       b_match = True
234                                         #elif x['category'] == "OLDBOOTCD" and y['bucket'][0] == 'dbg':
235                                         #       b_match = True
236                                         #elif x['category'] == "ERROR" and y['bucket'][0] == 'down':
237                                         #       b_match = True
238                                         #else:
239                                         #       b_match = False
240
241                                         #if b_match: 
242                                         #       # 2b) ok, b/c they agree that there's still a problem..
243                                         #       # 2b) Comon & Monitor still agree; RT ticket?
244                                         #       y['prev_category'] = y['category']
245                                         #else:
246                                         #       # 2a) mismatch, need a policy for how to resolve
247                                         #       #     resolution will be handled in __diagnoseNode()
248                                         #       #         for now just record the two categories.
249                                         #       #if x['category'] == "PROD" and x['state'] == "BOOT" and \
250                                         #       # ( y['bucket'][0] == 'down' or  y['bucket'][0] == 'dbg'):
251                                         #       print "FINDBAD and MONITOR have a mismatch: %s vs %s" % \
252                                         #                               (x['category'], y['bucket'])
253
254
255                                         self.mergedb[loginbase][nodename] = {}
256                                         self.mergedb[loginbase][nodename].update(y)
257                                         self.mergedb[loginbase][nodename]['comonstats'] = x['comonstats']
258                                         self.mergedb[loginbase][nodename]['category']   = x['category']
259                                         self.mergedb[loginbase][nodename]['state'] = x['state']
260                                         self.mergedb[loginbase][nodename]['kernel']=x['kernel']
261                                         self.mergedb[loginbase][nodename]['bootcd']=x['bootcd']
262                                         self.mergedb[loginbase][nodename]['plcnode']=x['plcnode']
263                                         # delete the entry from cache_all to keep it out of case 3)
264                                         del self.cache_all[nodename]
265
266                 # 3) nodes that remin in cache_all were not identified by findbad.
267                 #        Do we keep them or not?
268                 #   NOTE: i think that since the categories are performed before this
269                 #               step now, and by a monitor-controlled agent.
270
271                 # TODO: This does not work correctly.  Do we need this? 
272                 #for hn in self.cache_all.keys():
273                 #       y = self.act_all[hn][0]
274                 #       if 'monitor' in y['bucket']:
275                 #               loginbase = self.plcdb_hn2lb[hn] 
276                 #               if loginbase not in self.sickdb:
277                 #                       self.sickdb[loginbase] = {}
278                 #               self.sickdb[loginbase][hn] = y
279                 #       else:
280                 #               del self.cache_all[hn]
281
282                 print "len of cache_all: %d" % len(self.cache_all.keys())
283                 return
284
285         def sendToRT(self):
286                 sorted_sites = self.mergedb.keys()
287                 sorted_sites.sort()
288                 # look at all problems reported by merge
289                 for loginbase in sorted_sites:
290                         d_merge_nodes = self.mergedb[loginbase]
291                         for nodename in d_merge_nodes.keys():
292                                 record = self.mergedb[loginbase][nodename]
293                                 self.toRT.put(record)
294
295                 # send signal to stop reading
296                 self.toRT.put(None)
297                 return
298
299 class Diagnose(Thread):
300         def __init__(self, fromRT):
301                 self.fromRT = fromRT
302                 self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
303                 self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
304
305                 self.diagnose_in = {}
306                 self.diagnose_out = {}
307                 Thread.__init__(self)
308
309
310         def run(self):
311                 self.accumSickSites()
312
313                 print "Accumulated %d sick sites" % len(self.diagnose_in.keys())
314                 logger.debug("Accumulated %d sick sites" % len(self.diagnose_in.keys()))
315
316                 try:
317                         stats = self.diagnoseAll()
318                 except Exception, err:
319                         print "----------------"
320                         import traceback
321                         print traceback.print_exc()
322                         print err
323                         #if config.policysavedb:
324                         sys.exit(1)
325
326                 print_stats("sites_observed", stats)
327                 print_stats("sites_diagnosed", stats)
328                 print_stats("nodes_diagnosed", stats)
329
330                 if config.policysavedb:
331                         print "Saving Databases... diagnose_out"
332                         soltesz.dbDump("diagnose_out", self.diagnose_out)
333
334         def accumSickSites(self):
335                 """
336                 Take all nodes, from l_diagnose, look them up in the diagnose_out database, 
337                 and insert them into diagnose_in[] as:
338
339                         diagnose_in[loginbase] = [diag_node1, diag_node2, ...]
340                 """
341                 while 1:
342                         node_record = self.fromRT.get(block = True)
343                         if node_record == None:
344                                 break;
345
346                         nodename = node_record['nodename']
347                         loginbase = self.plcdb_hn2lb[nodename]
348
349                         if loginbase not in self.diagnose_in:
350                                 self.diagnose_in[loginbase] = {}
351
352                         self.diagnose_in[loginbase][nodename] = node_record
353
354                 return
355
356         def diagnoseAll(self):
357                 i_sites_observed = 0
358                 i_sites_diagnosed = 0
359                 i_nodes_diagnosed = 0
360                 i_nodes_actedon = 0
361                 i_sites_emailed = 0
362                 l_allsites = []
363
364                 sorted_sites = self.diagnose_in.keys()
365                 sorted_sites.sort()
366                 self.diagnose_out= {}
367                 for loginbase in sorted_sites:
368                         l_allsites += [loginbase]
369
370                         d_diag_nodes = self.diagnose_in[loginbase]
371                         d_act_records = self.__diagnoseSite(loginbase, d_diag_nodes)
372                         # store records in diagnose_out, for saving later.
373                         self.diagnose_out.update(d_act_records)
374                         
375                         if len(d_act_records[loginbase]['nodes'].keys()) > 0:
376                                 i_nodes_diagnosed += (len(d_act_records[loginbase]['nodes'].keys()))
377                                 i_sites_diagnosed += 1
378                         i_sites_observed += 1
379
380                 return {'sites_observed': i_sites_observed, 
381                                 'sites_diagnosed': i_sites_diagnosed, 
382                                 'nodes_diagnosed': i_nodes_diagnosed, 
383                                 'allsites':l_allsites}
384
385                 pass
386                 
387         def __getDaysDown(self, diag_record, nodename):
388                 daysdown = -1
389                 if diag_record['comonstats']['sshstatus'] != "null":
390                         daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
391                 elif diag_record['comonstats']['lastcotop'] != "null":
392                         daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
393                 else:
394                         now = time.time()
395                         last_contact = diag_record['plcnode']['last_contact']
396                         if last_contact == None:
397                                 # the node has never been up, so give it a break
398                                 daysdown = -1
399                         else:
400                                 diff = now - last_contact
401                                 daysdown = diff // (60*60*24)
402                 return daysdown
403
404         def __getStrDaysDown(self, diag_record, nodename):
405                 daysdown = self.__getDaysDown(diag_record, nodename)
406                 if daysdown > 0:
407                         return "(%d days down)"%daysdown
408                 else:
409                         return "Unknown number of days"
410
411         def __getCDVersion(self, diag_record, nodename):
412                 cdversion = ""
413                 #print "Getting kernel for: %s" % diag_record['nodename']
414                 cdversion = diag_record['kernel']
415                 return cdversion
416
417         def __diagnoseSite(self, loginbase, d_diag_nodes):
418                 """
419                 d_diag_nodes are diagnose_in entries.
420                 """
421                 d_diag_site = {loginbase : { 'config' : 
422                                                                                                 {'squeeze': False,
423                                                                                                  'email': False
424                                                                                                 }, 
425                                                                         'nodes': {}
426                                                                         }
427                                            }
428                 sorted_nodes = d_diag_nodes.keys()
429                 sorted_nodes.sort()
430                 for nodename in sorted_nodes:
431                         node_record = d_diag_nodes[nodename]
432                         diag_record = self.__diagnoseNode(loginbase, node_record)
433
434                         if diag_record != None:
435                                 d_diag_site[loginbase]['nodes'][nodename] = diag_record
436
437                                 # NOTE: improvement means, we need to act/squeeze and email.
438                                 #print "DIAG_RECORD", diag_record
439                                 if 'monitor-end-record' in diag_record['stage'] or \
440                                    'nmreset' in diag_record['stage']:
441                                 #       print "resetting loginbase!" 
442                                         d_diag_site[loginbase]['config']['squeeze'] = True
443                                         d_diag_site[loginbase]['config']['email'] = True
444                                 #else:
445                                 #       print "NO IMPROVEMENT!!!!"
446                         else:
447                                 pass # there is nothing to do for this node.
448
449                 # NOTE: these settings can be overridden by command line arguments,
450                 #       or the state of a record, i.e. if already in RT's Support Queue.
451                 nodes_up = self.getUpAtSite(loginbase, d_diag_site)
452                 if nodes_up < MINUP:
453                         d_diag_site[loginbase]['config']['squeeze'] = True
454
455                 max_slices = self.getMaxSlices(loginbase)
456                 num_nodes = self.getNumNodes(loginbase)
457                 # NOTE: when max_slices == 0, this is either a new site (the old way)
458                 #       or an old disabled site from previous monitor (before site['enabled'])
459                 if nodes_up < num_nodes and max_slices != 0:
460                         d_diag_site[loginbase]['config']['email'] = True
461
462                 if len(d_diag_site[loginbase]['nodes'].keys()) > 0:
463                         print "SITE: %20s : %d nodes up, at most" % (loginbase, nodes_up)
464
465                 return d_diag_site
466
467         def diagRecordByCategory(self, node_record):
468                 nodename = node_record['nodename']
469                 category = node_record['category']
470                 state    = node_record['state']
471                 loginbase = self.plcdb_hn2lb[nodename]
472                 diag_record = None
473
474                 if  "ERROR" in category:        # i.e. "DOWN"
475                         diag_record = {}
476                         diag_record.update(node_record)
477                         daysdown = self.__getDaysDown(diag_record, nodename) 
478                         if daysdown < 7:
479                                 format = "DIAG: %20s : %-40s Down only %s days  NOTHING DONE"
480                                 print format % (loginbase, nodename, daysdown)
481                                 return None
482
483                         s_daysdown = self.__getStrDaysDown(diag_record, nodename)
484                         diag_record['message'] = emailTxt.mailtxt.newdown
485                         diag_record['args'] = {'nodename': nodename}
486                         diag_record['info'] = (nodename, s_daysdown, "")
487                         if diag_record['ticket_id'] == "":
488                                 diag_record['log'] = "DOWN: %20s : %-40s == %20s %s" % \
489                                         (loginbase, nodename, diag_record['info'][1:], diag_record['found_rt_ticket'])
490                         else:
491                                 diag_record['log'] = "DOWN: %20s : %-40s == %20s %s" % \
492                                         (loginbase, nodename, diag_record['info'][1:], diag_record['ticket_id'])
493
494                 elif "OLDBOOTCD" in category:
495                         # V2 boot cds as determined by findbad
496                         s_daysdown = self.__getStrDaysDown(node_record, nodename)
497                         s_cdversion = self.__getCDVersion(node_record, nodename)
498                         diag_record = {}
499                         diag_record.update(node_record)
500                         #if "2.4" in diag_record['kernel'] or "v2" in diag_record['bootcd']:
501                         diag_record['message'] = emailTxt.mailtxt.newbootcd
502                         diag_record['args'] = {'nodename': nodename}
503                         diag_record['info'] = (nodename, s_daysdown, s_cdversion)
504                         if diag_record['ticket_id'] == "":
505                                 diag_record['log'] = "BTCD: %20s : %-40s == %20s %20s %s" % \
506                                                                         (loginbase, nodename, diag_record['kernel'], 
507                                                                          diag_record['bootcd'], diag_record['found_rt_ticket'])
508                         else:
509                                 diag_record['log'] = "BTCD: %20s : %-40s == %20s %20s %s" % \
510                                                                         (loginbase, nodename, diag_record['kernel'], 
511                                                                          diag_record['bootcd'], diag_record['ticket_id'])
512
513                 elif "PROD" in category:
514                         if "DEBUG" in state:
515                                 # Not sure what to do with these yet.  Probably need to
516                                 # reboot, and email.
517                                 print "DEBG: %20s : %-40s  NOTHING DONE" % (loginbase, nodename)
518                                 return None
519                         elif "BOOT" in state:
520                                 # no action needed.
521                                 # TODO: remove penalties, if any are applied.
522                                 now = time.time()
523                                 last_contact = node_record['plcnode']['last_contact']
524                                 if last_contact == None:
525                                         time_diff = 0
526                                 else:
527                                         time_diff = now - last_contact;
528
529                                 if 'improvement' in node_record['stage']:
530                                         # then we need to pass this on to 'action'
531                                         diag_record = {}
532                                         diag_record.update(node_record)
533                                         diag_record['message'] = emailTxt.mailtxt.newthankyou
534                                         diag_record['args'] = {'nodename': nodename}
535                                         diag_record['info'] = (nodename, node_record['prev_category'], 
536                                                                                                          node_record['category'])
537                                         if diag_record['ticket_id'] == "":
538                                                 diag_record['log'] = "IMPR: %20s : %-40s == %20s %20s %s %s" % \
539                                                                         (loginbase, nodename, diag_record['stage'], 
540                                                                          state, category, diag_record['found_rt_ticket'])
541                                         else:
542                                                 diag_record['log'] = "IMPR: %20s : %-40s == %20s %20s %s %s" % \
543                                                                         (loginbase, nodename, diag_record['stage'], 
544                                                                          state, category, diag_record['ticket_id'])
545                                         return diag_record
546                                 elif time_diff >= 6*SPERHOUR:
547                                         # heartbeat is older than 30 min.
548                                         # then reset NM.
549                                         #print "Possible NM problem!! %s - %s = %s" % (now, last_contact, time_diff)
550                                         diag_record = {}
551                                         diag_record.update(node_record)
552                                         diag_record['message'] = emailTxt.mailtxt.NMReset
553                                         diag_record['args'] = {'nodename': nodename}
554                                         diag_record['stage'] = "nmreset"
555                                         diag_record['info'] = (nodename, 
556                                                                                         node_record['prev_category'], 
557                                                                                         node_record['category'])
558                                         if diag_record['ticket_id'] == "":
559                                                 diag_record['log'] = "NM  : %20s : %-40s == %20s %20s %s %s" % \
560                                                                         (loginbase, nodename, diag_record['stage'], 
561                                                                          state, category, diag_record['found_rt_ticket'])
562                                         else:
563                                                 diag_record['log'] = "NM  : %20s : %-40s == %20s" % \
564                                                                         (loginbase, nodename, diag_record['stage'])
565
566                                         return diag_record
567                                 else:
568                                         return None
569                         else:
570                                 # unknown
571                                 pass
572                 elif "ALPHA"    in category:
573                         pass
574                 elif "clock_drift" in category:
575                         pass
576                 elif "dns"    in category:
577                         pass
578                 elif "filerw"    in category:
579                         pass
580                 else:
581                         print "Unknown category!!!! %s" % category
582                         sys.exit(1)
583
584                 return diag_record
585
586         def __diagnoseNode(self, loginbase, node_record):
587                 # TODO: change the format of the hostname in this 
588                 #               record to something more natural.
589                 nodename          = node_record['nodename']
590                 category          = node_record['category']
591                 prev_category = node_record['prev_category']
592                 state             = node_record['state']
593
594                 val = cmpCategoryVal(category, prev_category)
595                 if val == -1:
596                         # current category is worse than previous, carry on
597                         pass
598                 elif val == 1:
599                         # current category is better than previous
600                         # TODO: too generous for now, but will be handled correctly
601                         # TODO: if stage is currently ticket_waitforever, 
602                         if 'ticket_id' not in node_record:
603                                 print "ignoring: ", node_record['nodename']
604                                 return None
605                         else:
606                                 if node_record['ticket_id'] == "" or \
607                                    node_record['ticket_id'] == None:
608                                         print "closing: ", node_record['nodename']
609                                         node_record['action'] = ['close_rt']
610                                         node_record['message'] = None
611                                         node_record['stage'] = 'monitor-end-record'
612                                         return node_record
613                                         #return None
614                                 else:
615                                         node_record['stage'] = 'improvement'
616                 else:
617                         #values are equal, carry on.
618                         pass
619                         
620                 #### COMPARE category and prev_category
621                 # if not_equal
622                 #       then assign a stage based on relative priorities
623                 # else equal
624                 #       then check category for stats.
625                 diag_record = self.diagRecordByCategory(node_record)
626                 if diag_record == None:
627                         return None
628
629                 #### found_RT_ticket
630                 # TODO: need to record time found, and maybe add a stage for acting on it...
631                 if 'found_rt_ticket' in diag_record and \
632                         diag_record['found_rt_ticket'] is not None:
633                         if diag_record['stage'] is not 'improvement':
634                                 diag_record['stage'] = 'ticket_waitforever'
635                                 
636                 current_time = time.time()
637                 # take off four days, for the delay that database caused.
638                 # TODO: generalize delays at PLC, and prevent enforcement when there
639                 #               have been no emails.
640                 # NOTE: 7*SPERDAY exists to offset the 'bad week'
641                 #delta = current_time - diag_record['time'] - 7*SPERDAY
642                 delta = current_time - diag_record['time']
643
644                 message = diag_record['message']
645                 act_record = {}
646                 act_record.update(diag_record)
647
648                 #### DIAGNOSE STAGES 
649                 if   'findbad' in diag_record['stage']:
650                         # The node is bad, and there's no previous record of it.
651                         act_record['email'] = TECH
652                         act_record['action'] = ['noop']
653                         act_record['message'] = message[0]
654                         act_record['stage'] = 'stage_actinoneweek'
655
656                 elif 'nmreset' in diag_record['stage']:
657                         act_record['email']  = ADMIN 
658                         act_record['action'] = ['reset_nodemanager']
659                         act_record['message'] = message[0]
660                         act_record['stage']  = 'nmreset'
661                         return None
662                         
663                 elif 'improvement' in diag_record['stage']:
664                         # - backoff previous squeeze actions (slice suspend, nocreate)
665                         # TODO: add a backoff_squeeze section... Needs to runthrough
666                         act_record['action'] = ['close_rt']
667                         act_record['message'] = message[0]
668                         act_record['stage'] = 'monitor-end-record'
669
670                 elif 'actinoneweek' in diag_record['stage']:
671                         if delta >= 7 * SPERDAY: 
672                                 act_record['email'] = TECH | PI
673                                 act_record['stage'] = 'stage_actintwoweeks'
674                                 act_record['message'] = message[1]
675                                 act_record['action'] = ['nocreate' ]
676                                 act_record['time'] = current_time               # reset clock for waitforever
677                         elif delta >= 3* SPERDAY and not 'second-mail-at-oneweek' in act_record:
678                                 act_record['email'] = TECH 
679                                 act_record['message'] = message[0]
680                                 act_record['action'] = ['sendmailagain-waitforoneweekaction' ]
681                                 act_record['second-mail-at-oneweek'] = True
682                         else:
683                                 act_record['message'] = None
684                                 act_record['action'] = ['waitforoneweekaction' ]
685                                 return None                     # don't send if there's no action
686
687                 elif 'actintwoweeks' in diag_record['stage']:
688                         if delta >= 7 * SPERDAY:
689                                 act_record['email'] = TECH | PI | USER
690                                 act_record['stage'] = 'stage_waitforever'
691                                 act_record['message'] = message[2]
692                                 act_record['action'] = ['suspendslices']
693                                 act_record['time'] = current_time               # reset clock for waitforever
694                         elif delta >= 3* SPERDAY and not 'second-mail-at-twoweeks' in act_record:
695                                 act_record['email'] = TECH | PI
696                                 act_record['message'] = message[1]
697                                 act_record['action'] = ['sendmailagain-waitfortwoweeksaction' ]
698                                 act_record['second-mail-at-twoweeks'] = True
699                         else:
700                                 act_record['message'] = None
701                                 act_record['action'] = ['waitfortwoweeksaction']
702                                 return None                     # don't send if there's no action
703
704                 elif 'ticket_waitforever' in diag_record['stage']:
705                         act_record['email'] = TECH
706                         if 'first-found' not in act_record:
707                                 act_record['first-found'] = True
708                                 act_record['log'] += " firstfound"
709                                 act_record['action'] = ['ticket_waitforever']
710                                 act_record['message'] = None
711                                 act_record['time'] = current_time
712                         else:
713                                 if delta >= 7*SPERDAY:
714                                         act_record['action'] = ['ticket_waitforever']
715                                         act_record['message'] = None
716                                         act_record['time'] = current_time               # reset clock
717                                 else:
718                                         act_record['action'] = ['ticket_waitforever']
719                                         act_record['message'] = None
720                                         return None
721
722                 elif 'waitforever' in diag_record['stage']:
723                         # more than 3 days since last action
724                         # TODO: send only on weekdays.
725                         # NOTE: expects that 'time' has been reset before entering waitforever stage
726                         if delta >= 3*SPERDAY:
727                                 act_record['action'] = ['email-againwaitforever']
728                                 act_record['message'] = message[2]
729                                 act_record['time'] = current_time               # reset clock
730                         else:
731                                 act_record['action'] = ['waitforever']
732                                 act_record['message'] = None
733                                 return None                     # don't send if there's no action
734
735                 else:
736                         # There is no action to be taken, possibly b/c the stage has
737                         # already been performed, but diagnose picked it up again.
738                         # two cases, 
739                         #       1. stage is unknown, or 
740                         #       2. delta is not big enough to bump it to the next stage.
741                         # TODO: figure out which. for now assume 2.
742                         print "UNKNOWN!?!? %s" % nodename
743                         act_record['action'] = ['unknown']
744                         act_record['message'] = message[0]
745                         print "Exiting..."
746                         sys.exit(1)
747
748                 print "%s" % act_record['log'],
749                 print "%15s" % act_record['action']
750                 return act_record
751
752         def getMaxSlices(self, loginbase):
753                 # if sickdb has a loginbase, then it will have at least one node.
754                 site_stats = None
755
756                 for nodename in self.diagnose_in[loginbase].keys():
757                         if nodename in self.findbad['nodes']:
758                                 site_stats = self.findbad['nodes'][nodename]['values']['plcsite']
759                                 break
760
761                 if site_stats == None:
762                         raise Exception, "loginbase with no nodes in findbad"
763                 else:
764                         return site_stats['max_slices']
765
766         def getNumNodes(self, loginbase):
767                 # if sickdb has a loginbase, then it will have at least one node.
768                 site_stats = None
769
770                 for nodename in self.diagnose_in[loginbase].keys():
771                         if nodename in self.findbad['nodes']:
772                                 site_stats = self.findbad['nodes'][nodename]['values']['plcsite']
773                                 break
774
775                 if site_stats == None:
776                         raise Exception, "loginbase with no nodes in findbad"
777                 else:
778                         return site_stats['num_nodes']
779
780         """
781         Returns number of up nodes as the total number *NOT* in act_all with a
782         stage other than 'steady-state' .
783         """
784         def getUpAtSite(self, loginbase, d_diag_site):
785                 # TODO: THIS DOESN"T WORK!!! it misses all the 'debug' state nodes
786                 #               that aren't recorded yet.
787
788                 numnodes = self.getNumNodes(loginbase)
789                 # NOTE: assume nodes we have no record of are ok. (too conservative)
790                 # TODO: make the 'up' value more representative
791                 up = numnodes
792                 for nodename in d_diag_site[loginbase]['nodes'].keys():
793
794                         rec = d_diag_site[loginbase]['nodes'][nodename]
795                         if rec['stage'] != 'monitor-end-record':
796                                 up -= 1
797                         else:
798                                 pass # the node is assumed to be up.
799
800                 #if up != numnodes:
801                 #       print "ERROR: %s total nodes up and down != %d" % (loginbase, numnodes)
802
803                 return up
804
805
806 class SiteAction:
807         def __init__(self, parameter_names=['hostname', 'ticket_id']):
808                 self.parameter_names = parameter_names
809         def checkParam(self, args):
810                 for param in self.parameter_names:
811                         if param not in args:
812                                 raise Exception("Parameter %s not provided in args"%param)
813         def run(self, args):
814                 self.checkParam(args)
815                 return self._run(args)
816         def _run(self, args):
817                 pass
818
819 class SuspendAction(SiteAction):
820         def _run(self, args):
821                 return plc.suspendSlices(args['hostname'])
822
823 class RemoveSliceCreation(SiteAction):
824         def _run(self, args):
825                 return plc.removeSliceCreation(args['hostname'])
826
827 class BackoffActions(SiteAction):
828         def _run(self, args):
829                 plc.enableSlices(args['hostname'])
830                 plc.enableSliceCreation(args['hostname'])
831                 return True
832
833 # TODO: create class for each action below, 
834 #               allow for lists of actions to be performed...
835
836 def close_rt_backoff(args):
837         if 'ticket_id' in args and (args['ticket_id'] != "" and args['ticket_id'] != None):
838                 mailer.closeTicketViaRT(args['ticket_id'], 
839                                                                 "Ticket CLOSED automatically by SiteAssist.")
840                 plc.enableSlices(args['hostname'])
841                 plc.enableSliceCreation(args['hostname'])
842         return
843
844 def reboot_node(args):
845         host = args['hostname']
846         return reboot.reboot_new(host, True, config.debug)
847
848 def reset_nodemanager(args):
849         os.system("ssh root@%s /sbin/service nm restart" % nodename)
850         return
851
852 class Action(Thread):
853         def __init__(self, l_action):
854                 self.l_action = l_action
855
856                 # the hostname to loginbase mapping
857                 self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
858
859                 # Actions to take.
860                 self.diagnose_db = soltesz.if_cached_else(1, "diagnose_out", lambda : {})
861                 # Actions taken.
862                 self.act_all   = soltesz.if_cached_else(1, "act_all", lambda : {})
863
864                 # A dict of actions to specific functions. PICKLE doesnt' like lambdas.
865                 self.actions = {}
866                 self.actions['suspendslices'] = lambda args: plc.suspendSlices(args['hostname'])
867                 self.actions['nocreate'] = lambda args: plc.removeSliceCreation(args['hostname'])
868                 self.actions['close_rt'] = lambda args: close_rt_backoff(args)
869                 self.actions['rins'] = lambda args: plc.nodeBootState(args['hostname'], "rins") 
870                 self.actions['noop'] = lambda args: args
871                 self.actions['reboot_node'] = lambda args: reboot_node(args)
872                 self.actions['reset_nodemanager'] = lambda args: args # reset_nodemanager(args)
873
874                 self.actions['ticket_waitforever'] = lambda args: args
875                 self.actions['waitforever'] = lambda args: args
876                 self.actions['unknown'] = lambda args: args
877                 self.actions['waitforoneweekaction'] = lambda args: args
878                 self.actions['waitfortwoweeksaction'] = lambda args: args
879                 self.actions['sendmailagain-waitforoneweekaction'] = lambda args: args
880                 self.actions['sendmailagain-waitfortwoweeksaction'] = lambda args: args
881                 self.actions['email-againwaitforever'] = lambda args: args
882                 self.actions['email-againticket_waitforever'] = lambda args: args
883                                 
884
885                 self.sickdb = {}
886                 Thread.__init__(self)
887
888         def run(self):
889                 self.accumSites()
890                 print "Accumulated %d sick sites" % len(self.sickdb.keys())
891                 logger.debug("Accumulated %d sick sites" % len(self.sickdb.keys()))
892
893                 try:
894                         stats = self.analyseSites()
895                 except Exception, err:
896                         print "----------------"
897                         import traceback
898                         print traceback.print_exc()
899                         print err
900                         if config.policysavedb:
901                                 print "Saving Databases... act_all"
902                                 soltesz.dbDump("act_all", self.act_all)
903                         sys.exit(1)
904
905                 print_stats("sites_observed", stats)
906                 print_stats("sites_diagnosed", stats)
907                 print_stats("nodes_diagnosed", stats)
908                 print_stats("sites_emailed", stats)
909                 print_stats("nodes_actedon", stats)
910                 print string.join(stats['allsites'], ",")
911
912                 if config.policysavedb:
913                         print "Saving Databases... act_all"
914                         #soltesz.dbDump("policy.eventlog", self.eventlog)
915                         # TODO: remove 'diagnose_out', 
916                         #       or at least the entries that were acted on.
917                         soltesz.dbDump("act_all", self.act_all)
918
919         def accumSites(self):
920                 """
921                 Take all nodes, from l_action, look them up in the diagnose_db database, 
922                 and insert them into sickdb[] as:
923
924                 This way only the given l_action nodes will be acted on regardless
925                 of how many from diagnose_db are available.
926
927                         sickdb[loginbase][nodename] = diag_record
928                 """
929                 # TODO: what if l_action == None ?
930                 for nodename in self.l_action:
931
932                         loginbase = self.plcdb_hn2lb[nodename]
933
934                         if loginbase in self.diagnose_db and \
935                                 nodename in self.diagnose_db[loginbase]['nodes']:
936
937                                 diag_record = self.diagnose_db[loginbase]['nodes'][nodename]
938
939                                 if loginbase not in self.sickdb:
940                                         self.sickdb[loginbase] = {'nodes' : {}}
941
942                                 # NOTE: don't copy all node records, since not all will be in l_action
943                                 self.sickdb[loginbase]['nodes'][nodename] = diag_record
944                                 # NOTE: but, we want to get the loginbase config settings, 
945                                 #               this is the easiest way.
946                                 self.sickdb[loginbase]['config'] = self.diagnose_db[loginbase]['config']
947                         #else:
948                                 #print "%s not in diagnose_db!!" % loginbase
949                 return
950
951         def __emailSite(self, loginbase, roles, message, args):
952                 """
953                 loginbase is the unique site abbreviation, prepended to slice names.
954                 roles contains TECH, PI, USER roles, and derive email aliases.
955                 record contains {'message': [<subj>,<body>], 'args': {...}} 
956                 """
957                 ticket_id = 0
958                 args.update({'loginbase':loginbase})
959
960                 if not config.mail and not config.debug and config.bcc:
961                         roles = ADMIN
962                 if config.mail and config.debug:
963                         roles = ADMIN
964
965                 # build targets
966                 contacts = []
967                 if ADMIN & roles:
968                         contacts += [config.email]
969                 if TECH & roles:
970                         contacts += [TECHEMAIL % loginbase]
971                 if PI & roles:
972                         contacts += [PIEMAIL % loginbase]
973                 if USER & roles:
974                         slices = plc.slices(loginbase)
975                         if len(slices) >= 1:
976                                 for slice in slices:
977                                         contacts += [SLICEMAIL % slice]
978                                 print "SLIC: %20s : %d slices" % (loginbase, len(slices))
979                         else:
980                                 print "SLIC: %20s : 0 slices" % loginbase
981
982                 try:
983                         subject = message[0] % args
984                         body = message[1] % args
985                         if ADMIN & roles:
986                                 # send only to admin
987                                 if 'ticket_id' in args:
988                                         subj = "Re: [PL #%s] %s" % (args['ticket_id'], subject)
989                                 else:
990                                         subj = "Re: [PL noticket] %s" % subject
991                                 mailer.email(subj, body, contacts)
992                                 ticket_id = args['ticket_id']
993                         else:
994                                 ticket_id = mailer.emailViaRT(subject, body, contacts, args['ticket_id'])
995                 except Exception, err:
996                         print "exception on message:"
997                         import traceback
998                         print traceback.print_exc()
999                         print message
1000
1001                 return ticket_id
1002
1003
1004         def _format_diaginfo(self, diag_node):
1005                 info = diag_node['info']
1006                 if diag_node['stage'] == 'monitor-end-record':
1007                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
1008                 else:
1009                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
1010                 return hlist
1011
1012
1013         def get_email_args(self, act_recordlist):
1014
1015                 email_args = {}
1016                 email_args['hostname_list'] = ""
1017
1018                 for act_record in act_recordlist:
1019                         email_args['hostname_list'] += act_record['msg_format']
1020                         email_args['hostname'] = act_record['nodename']
1021                         if  'plcnode' in act_record and \
1022                                 'pcu_ids' in act_record['plcnode'] and \
1023                                 len(act_record['plcnode']['pcu_ids']) > 0:
1024                                 print "setting 'pcu_id' for email_args %s"%email_args['hostname']
1025                                 email_args['pcu_id'] = act_record['plcnode']['pcu_ids'][0]
1026                         else:
1027                                 email_args['pcu_id'] = "-1"
1028                                         
1029                         if 'ticket_id' in act_record:
1030                                 email_args['ticket_id'] = act_record['ticket_id']
1031
1032                 return email_args
1033
1034         def get_unique_issues(self, act_recordlist):
1035                 # NOTE: only send one email per site, per problem...
1036                 unique_issues = {}
1037                 for act_record in act_recordlist:
1038                         act_key = act_record['action'][0]
1039                         if act_key not in unique_issues:
1040                                 unique_issues[act_key] = []
1041                                 
1042                         unique_issues[act_key] += [act_record]
1043                         
1044                 return unique_issues
1045                         
1046
1047         def __actOnSite(self, loginbase, site_record):
1048                 i_nodes_actedon = 0
1049                 i_nodes_emailed = 0
1050
1051                 act_recordlist = []
1052
1053                 for nodename in site_record['nodes'].keys():
1054                         diag_record = site_record['nodes'][nodename]
1055                         act_record  = self.__actOnNode(diag_record)
1056                         #print "nodename: %s %s" % (nodename, act_record)
1057                         if act_record is not None:
1058                                 act_recordlist += [act_record]
1059
1060                 unique_issues = self.get_unique_issues(act_recordlist)
1061
1062                 for issue in unique_issues.keys():
1063                         print "\tworking on issue: %s" % issue
1064                         issue_record_list = unique_issues[issue]
1065                         email_args = self.get_email_args(issue_record_list)
1066
1067                         # for each record.
1068                         for act_record in issue_record_list:
1069                                 # if there's a pcu record and email config is set
1070                                 if 'email_pcu' in act_record:
1071                                         if act_record['email_pcu'] and \
1072                                                 site_record['config']['email']:
1073
1074                                                 ticket_id = self.__emailSite(loginbase, 
1075                                                                                         act_record['email'], 
1076                                                                                         emailTxt.mailtxt.pcudown[0],
1077                                                                                         email_args)
1078                                                 email_args['ticket_id'] = ticket_id
1079
1080                         
1081                         act_record = issue_record_list[0]
1082                         # send message before squeezing
1083                         print "\t\tconfig.email: %s and %s" % (act_record['message'] != None, 
1084                                                                                                 site_record['config']['email'])
1085                         if act_record['message'] != None and site_record['config']['email']:
1086                                 ticket_id = self.__emailSite(loginbase, act_record['email'], 
1087                                                                                          act_record['message'], email_args)
1088
1089                                 # Add ticket_id to ALL nodenames
1090                                 for act_record in issue_record_list:
1091                                         nodename = act_record['nodename']
1092                                         # update node record with RT ticket_id
1093                                         if nodename in self.act_all:
1094                                                 self.act_all[nodename][0]['ticket_id'] = "%s" % ticket_id
1095                                         if config.mail: i_nodes_emailed += 1
1096
1097                         print "\t\tconfig.squeeze: %s and %s" % (config.squeeze,
1098                                                                                                         site_record['config']['squeeze'])
1099                         if config.squeeze and site_record['config']['squeeze']:
1100                                 for act_key in act_record['action']:
1101                                         self.actions[act_key](email_args)
1102                                 i_nodes_actedon += 1
1103                 
1104                 if config.policysavedb:
1105                         print "Saving Databases... act_all, diagnose_out"
1106                         soltesz.dbDump("act_all", self.act_all)
1107                         # remove site record from diagnose_out, it's in act_all as done.
1108                         del self.diagnose_db[loginbase]
1109                         soltesz.dbDump("diagnose_out", self.diagnose_db)
1110
1111                 #print "sleeping for 1 sec"
1112                 #time.sleep(1)
1113                 #print "Hit enter to continue..."
1114                 sys.stdout.flush()
1115                 line = sys.stdin.readline()
1116
1117                 return (i_nodes_actedon, i_nodes_emailed)
1118
1119         def __actOnNode(self, diag_record):
1120                 nodename = diag_record['nodename']
1121                 message = diag_record['message']
1122
1123                 act_record = {}
1124                 act_record.update(diag_record)
1125                 act_record['nodename'] = nodename
1126                 act_record['msg_format'] = self._format_diaginfo(diag_record)
1127
1128
1129                 if "DOWN" in act_record['log']:
1130                         print "%s" % act_record['log'],
1131                         print "%15s" % (['reboot_node'],)
1132                         ret = reboot_node(act_record['nodename'])
1133                         if ret:
1134                                 # Reboot Succeeded
1135                                 act_record2 = {}
1136                                 act_record2.update(act_record)
1137                                 act_record2['action'] = ['reboot_node']
1138                                 act_record2['reboot_node_failed'] = False
1139                                 act_record2['email_pcu'] = False
1140
1141                                 if nodename not in self.act_all: 
1142                                         self.act_all[nodename] = []
1143                                 self.act_all[nodename].insert(0,act_record2)
1144
1145                                 # return None to avoid further action
1146                                 return None
1147                         else:
1148                                 # set email_pcu to also send pcu notice for this record.
1149                                 act_record['reboot_node_failed'] = True
1150                                 act_record['email_pcu'] = True
1151
1152                 print "%s" % act_record['log'],
1153                 print "%15s" % act_record['action']
1154
1155                 if act_record['stage'] is not 'monitor-end-record' and \
1156                    act_record['stage'] is not 'nmreset':
1157                         if nodename not in self.act_all: 
1158                                 self.act_all[nodename] = []
1159
1160                         self.act_all[nodename].insert(0,act_record)
1161                 else:
1162                         print "Not recording %s in act_all" % nodename
1163
1164                 return act_record
1165
1166         def analyseSites(self):
1167                 i_sites_observed = 0
1168                 i_sites_diagnosed = 0
1169                 i_nodes_diagnosed = 0
1170                 i_nodes_actedon = 0
1171                 i_sites_emailed = 0
1172                 l_allsites = []
1173
1174                 sorted_sites = self.sickdb.keys()
1175                 sorted_sites.sort()
1176                 for loginbase in sorted_sites:
1177                         site_record = self.sickdb[loginbase]
1178                         print "sites: %s" % loginbase
1179                         
1180                         i_nodes_diagnosed += len(site_record.keys())
1181                         i_sites_diagnosed += 1
1182
1183                         (na,ne) = self.__actOnSite(loginbase, site_record)
1184
1185                         i_sites_observed += 1
1186                         i_nodes_actedon += na
1187                         i_sites_emailed += ne
1188
1189                         l_allsites += [loginbase]
1190
1191                 return {'sites_observed': i_sites_observed, 
1192                                 'sites_diagnosed': i_sites_diagnosed, 
1193                                 'nodes_diagnosed': i_nodes_diagnosed, 
1194                                 'sites_emailed': i_sites_emailed, 
1195                                 'nodes_actedon': i_nodes_actedon, 
1196                                 'allsites':l_allsites}
1197
1198         def print_stats(self, key, stats):
1199                 print "%20s : %d" % (key, stats[key])
1200
1201
1202
1203         #"""
1204         #Prints, logs, and emails status of up nodes, down nodes, and buckets.
1205         #"""
1206         #def status(self):
1207         #       sub = "Monitor Summary"
1208         #       msg = "\nThe following nodes were acted upon:  \n\n"
1209         #       for (node, (type, date)) in self.emailed.items():
1210         #               # Print only things acted on today.
1211         #               if (time.gmtime(time.time())[2] == time.gmtime(date)[2]):
1212         #                       msg +="%s\t(%s)\t%s\n" %(node, type, time.ctime(date))
1213         #       msg +="\n\nThe following sites have been 'squeezed':\n\n"
1214         #       for (loginbase, (date, type)) in self.squeezed.items():
1215         #               # Print only things acted on today.
1216         #               if (time.gmtime(time.time())[2] == time.gmtime(date)[2]):
1217         #                       msg +="%s\t(%s)\t%s\n" %(loginbase, type, time.ctime(date))
1218         #       mailer.email(sub, msg, [SUMTO])
1219         #       logger.info(msg)
1220         #       return 
1221
1222         #"""
1223         #Store/Load state of emails.  When, where, what.
1224         #"""
1225         #def emailedStore(self, action):
1226         #       try:
1227         #               if action == "LOAD":
1228         #                       f = open(DAT, "r+")
1229         #                       logger.info("POLICY:  Found and reading " + DAT)
1230         #                       self.emailed.update(pickle.load(f))
1231         #               if action == "WRITE":
1232         #                       f = open(DAT, "w")
1233         #                       #logger.debug("Writing " + DAT)
1234         #                       pickle.dump(self.emailed, f)
1235         #               f.close()
1236         #       except Exception, err:
1237         #               logger.info("POLICY:  Problem with DAT, %s" %err)
1238
1239
1240 #class Policy(Thread):
1241
1242 def main():
1243         print "policy.py is a module, not a script for running directly."
1244
1245 if __name__ == '__main__':
1246         import os
1247         import plc
1248         try:
1249                 main()
1250         except KeyboardInterrupt:
1251                 print "Killed.  Exitting."
1252                 logger.info('Monitor Killed')
1253                 os._exit(0)