Kill orphaned NM procs in slices. pl-41rc2wStorkHack
authorFaiyaz Ahmed <faiyaza@cs.princeton.edu>
Mon, 18 Feb 2008 18:41:34 +0000 (18:41 +0000)
committerFaiyaz Ahmed <faiyaza@cs.princeton.edu>
Mon, 18 Feb 2008 18:41:34 +0000 (18:41 +0000)
pl_mom.spec
swapmon.py

index 1797426..e40f412 100644 (file)
@@ -4,8 +4,8 @@
 %define url $URL$
 
 %define name pl_mom
-%define version 2.21
-%define taglevel 2
+%define version 2.2
+%define taglevel 04
 
 %define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}
  
index dd246a2..5758108 100755 (executable)
@@ -626,10 +626,12 @@ def main():
                             slicemail(slice['name'], alarm_subject % params, 
                               alarm_body % params)
                 else:
-                    # Reset slice
-                    if not resetlist.has_key(slice['name']):
-                        resetlist[slice['name']] = Reset(slice['name'])
-                    resetlist[slice['name']].reset(params)
+                    # XXX HACK to kill zombied NM procs in slices
+                    if not killNM(slice):
+                        # Reset slice
+                        if not resetlist.has_key(slice['name']):
+                            resetlist[slice['name']] = Reset(slice['name'])
+                        resetlist[slice['name']].reset(params)
 
         # wait period vefore recalculating swap.  If in danger, recalc.
         if timer <= 0 or used >= (last_used + change_thresh):
@@ -647,5 +649,15 @@ def main():
 
     removepid("swapmon")
 
+
+def killNM(slice):
+    for proc in slice['procs']:
+        if proc['cmd'] == "/usr/bin/python /usr/share/NodeManager/nm.py -d -s":
+            print "Killing NM in slice %s" % slice['name']
+            bwlimit.run("kill -9 %s" % proc['pid'])
+            return True
+    return False
+
+
 if __name__ == '__main__':
     main()