X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fnepi%2Fresources%2Flinux%2Fnode.py;h=ee7e2e1d17afbfb472a33eb76a50102cd5005292;hb=6285ca51026efb69642eea9dfc7c480e722d84a9;hp=0abbf8fea85ebf451be53c36a73271df3d3f4709;hpb=e96e370df463518cf451a8f8df9f9a93d2b94730;p=nepi.git diff --git a/src/nepi/resources/linux/node.py b/src/nepi/resources/linux/node.py index 0abbf8fe..ee7e2e1d 100644 --- a/src/nepi/resources/linux/node.py +++ b/src/nepi/resources/linux/node.py @@ -297,7 +297,7 @@ class LinuxNode(ResourceManager): if not self.localhost and not self.get("username"): msg = "Can't resolve OS, insufficient data " self.error(msg) - raise RuntimeError, msg + raise RuntimeError(msg) out = self.get_os() @@ -352,9 +352,10 @@ class LinuxNode(ResourceManager): def do_provision(self): # check if host is alive if not self.is_alive(): - msg = "Deploy failed. Unresponsive node {}".format(self.get("hostname")) + trace = traceback.format_exc() + msg = "Deploy failed. Unresponsive node {} -- traceback {}".format(self.get("hostname"), trace) self.error(msg) - raise RuntimeError, msg + raise RuntimeError(msg) self.find_home() @@ -439,18 +440,63 @@ class LinuxNode(ResourceManager): "sudo -S killall -u {} || /bin/true ; ".format(self.get("username"))) else: if self.state >= ResourceState.READY: + ######################## + #Collect all process (must change for a more intelligent way) + ppid = [] + pids = [] + avoid_pids = "ps axjf | awk '{print $1,$2}'" + (out, err), proc = self.execute(avoid_pids) + if len(out) != 0: + for line in out.strip().split("\n"): + parts = line.strip().split(" ") + ppid.append(parts[0]) + pids.append(parts[1]) + + #Collect all process below ssh -D + tree_owner = 0 + ssh_pids = [] + sshs = "ps aux | grep 'sshd' | awk '{print $2,$12}'" + (out, err), proc = self.execute(sshs) + if len(out) != 0: + for line in out.strip().split("\n"): + parts = line.strip().split(" ") + if parts[1].startswith('root@pts'): + ssh_pids.append(parts[0]) + elif parts[1] == "-D": + tree_owner = parts[0] + + avoid_kill = [] + temp = [] + #Search for the child process of the pid's collected at the first block. + for process in ssh_pids: + temp = self.search_for_child(process, pids, ppid) + avoid_kill = list(set(temp)) + + if len(avoid_kill) > 0: + avoid_kill.append(tree_owner) + ######################## + import pickle - pids = pickle.load(open("/tmp/save.proc", "rb")) + with open("/tmp/save.proc", "rb") as pickle_file: + pids = pickle.load(pickle_file) pids_temp = dict() - ps_aux = "ps aux |awk '{print $2,$11}'" + ps_aux = "ps aux | awk '{print $2,$11}'" (out, err), proc = self.execute(ps_aux) if len(out) != 0: for line in out.strip().split("\n"): parts = line.strip().split(" ") pids_temp[parts[0]] = parts[1] + # creates the difference between the machine pids freezed (pickle) and the actual + # adding the avoided pids filtered above (avoid_kill) to allow users keep process + # alive when using besides ssh connections kill_pids = set(pids_temp.items()) - set(pids.items()) kill_pids = ' '.join(dict(kill_pids).keys()) + # removing pids from beside connections and its process + kill_pids = kill_pids.split(' ') + kill_pids = list(set(kill_pids) - set(avoid_kill)) + kill_pids = ' '.join(kill_pids) + cmd = ("killall tcpdump || /bin/true ; " + "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; " + "kill {} || /bin/true ; ".format(kill_pids)) @@ -463,12 +509,22 @@ class LinuxNode(ResourceManager): (out, err), proc = self.execute(cmd, retry = 1, with_lock = True) + def search_for_child(self, pid, pids, ppid, family=[]): + """ Recursive function to search for child. List A contains the pids and list B the parents (ppid) + """ + family.append(pid) + for key, value in enumerate(ppid): + if value == pid: + child = pids[key] + self.search_for_child(child, pids, ppid) + return family + def clean_home(self): """ Cleans all NEPI related folders in the Linux host """ self.info("Cleaning up home") - cmd = "cd {} ; find . -maxdepth 1 -name \.nepi -execdir rm -rf {} + "\ + cmd = "cd {} ; find . -maxdepth 1 -name \.nepi -execdir rm -rf {{}} + "\ .format(self.home_dir) return self.execute(cmd, with_lock = True) @@ -480,7 +536,7 @@ class LinuxNode(ResourceManager): """ self.info("Cleaning up experiment files") - cmd = "cd {} ; find . -maxdepth 1 -name '{}' -execdir rm -rf {} + "\ + cmd = "cd {} ; find . -maxdepth 1 -name '{}' -execdir rm -rf {{}} + "\ .format(self.exp_dir, self.ec.exp_id) return self.execute(cmd, with_lock = True) @@ -741,7 +797,7 @@ class LinuxNode(ResourceManager): msg = "{} out: {} err: {}".format(msg, out, err) if raise_on_error: - raise RuntimeError, msg + raise RuntimeError(msg) return ((out, err), proc) @@ -757,7 +813,7 @@ class LinuxNode(ResourceManager): self.error(msg, out, err) if raise_on_error: - raise RuntimeError, msg + raise RuntimeError(msg) return ((out, err), proc) @@ -770,7 +826,7 @@ class LinuxNode(ResourceManager): else: msg = "Error installing packages ( OS not known ) " self.error(msg, self.os) - raise RuntimeError, msg + raise RuntimeError(msg) return command @@ -811,7 +867,7 @@ class LinuxNode(ResourceManager): else: msg = "Error removing packages ( OS not known ) " self.error(msg) - raise RuntimeError, msg + raise RuntimeError(msg) run_home = run_home or home @@ -895,7 +951,7 @@ class LinuxNode(ResourceManager): msg = " Failed to run command '{}' ".format(command) self.error(msg, out, err) if raise_on_error: - raise RuntimeError, msg + raise RuntimeError(msg) # Wait for pid file to be generated pid, ppid = self.wait_pid( @@ -917,7 +973,7 @@ class LinuxNode(ResourceManager): self.error(msg, eout, err) if raise_on_error: - raise RuntimeError, msg + raise RuntimeError(msg) (out, oerr), proc = self.check_output(home, stdout) @@ -1032,7 +1088,7 @@ class LinuxNode(ResourceManager): self.error(msg) if raise_on_error: - raise RuntimeError, msg + raise RuntimeError(msg) return pid, ppid @@ -1111,7 +1167,7 @@ class LinuxNode(ResourceManager): if not self._home_dir: self.error(msg) - raise RuntimeError, msg + raise RuntimeError(msg) def filter_existing_files(self, src, dst): """ Removes files that already exist in the Linux host from src list