Checks NM to see if still alive by querying XMLRPC. If time out, restart NM.
authorFaiyaz Ahmed <faiyaza@cs.princeton.edu>
Thu, 17 Aug 2006 20:05:24 +0000 (20:05 +0000)
committerFaiyaz Ahmed <faiyaza@cs.princeton.edu>
Thu, 17 Aug 2006 20:05:24 +0000 (20:05 +0000)
nm_mom.py [new file with mode: 0755]

diff --git a/nm_mom.py b/nm_mom.py
new file mode 100755 (executable)
index 0000000..4484a33
--- /dev/null
+++ b/nm_mom.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+# 
+# Restarts NM if NM does not respond to periodic probes.
+#
+# Faiyaz Ahmed <faiyaza@cs.princeton.edu>
+# Copyright (C) 2006 The Trustees of Princeton University
+#
+# $Id: $
+# 
+
+import syslog
+import os
+import sys
+import getopt
+import time
+import pickle
+
+import socket
+import xmlrpclib
+import bwlimit
+
+from sets import Set
+
+# Utility functions
+from pl_mom import *
+
+
+def restartNM():
+       try:
+               pid = os.fork()
+                if pid == 0:
+                       bwlimit.run("/etc/init.d/pl_nm stop")
+                       try:
+                               nmpid = bwlimit.run("/bin/ps -e -o pid,cmd | grep nm.py")[0].split(" ")[0]
+                               os.kill(int(nmpid), 9)
+                               time.sleep(3)
+                               print "NM  %s forcefully killed." %nmpid
+                       except:
+                               print "NM %s killed." %nmpid
+                       bwlimit.run("/etc/init.d/pl_nm restart")
+                       print "NM restarted."
+                       os._exit(0)
+                else:
+                        os.waitpid(pid,0)
+        except Exception, err:
+                print "Warning: Exception received while restarting NM:  %s" % err
+
+
+def main():
+        syslog.openlog("nm_mom")
+        sys.stdout = sys.stderr = Logger()
+       
+       nm = NM()
+       last_maxrate = 0 
+       try:
+               # Query something.  If it doesnt come back...
+                vals = nm.query("pl_netflow", [('nm_net_max_rate', last_maxrate)])
+
+        except Exception, err:
+                print "Warning: Exception received while querying NM:", err
+               restartNM()
+
+if __name__ == '__main__':
+    main()