From 19baba88753ccccac14940a590a456a19aafd783 Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Thu, 29 Oct 2015 22:34:29 +0100 Subject: [PATCH] script for spotting and trashing zombie containers --- support-scripts/cleanup-zombies.py | 108 +++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 support-scripts/cleanup-zombies.py diff --git a/support-scripts/cleanup-zombies.py b/support-scripts/cleanup-zombies.py new file mode 100755 index 0000000..c3df0ac --- /dev/null +++ b/support-scripts/cleanup-zombies.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +# node manager has a few working assumptions, like +# if a domain d does not exist, there is no /vservers/d + +# this utility tries to detect and assess potentially +# conflictual situations, that could prevent nodemanager +# from recovering properly +# +# the logic is simply to find zombie-containers, i.e. +# VMs that do have a workdir in /vservers/ +# but that are not reported as running by virsh --list +# which suggests they have been improperly trashed +### +# +# then we trash them but for that some subdirs must be +# btrfs-subvolume-delete'd and not rm-rf'ed +# + +import subprocess +import glob +import os, os.path +from argparse import ArgumentParser + +def running_domains(): + command = [ + 'virsh', + '-c', + 'lxc:///', + 'list', + '--name', + ] + names_string = subprocess.check_output( + command, + universal_newlines = True, + stdin = subprocess.DEVNULL, + ) + names = [ name for name in names_string.strip().split("\n") if name ] + return names + +def existing_vservers(): + all_dirs = glob.glob("/vservers/*") + dirs = ( dir for dir in all_dirs if os.path.isdir(dir) ) + dirnames = ( path.replace("/vservers/", "") for path in dirs) + return dirnames + +def display_or_run_commands(commands, run): + if commands: + if not run: + print("========== You should run") + for command in commands: + print(" ".join(command)) + else: + for command in commands: + print("Running {}".format(" ".join(command))) + retcod = subprocess.call(command) + if retcod != 0: + print("Warning: failed with retcod = {}".format(retcod)) + +def main(): + parser = ArgumentParser() + # the default is to cowardly show commands to run + # use --run to actually do it + parser.add_argument("-r", "--run", action='store_true', default=False) + args = parser.parse_args() + + running_containers = set(running_domains()) + existing_containers = set(existing_vservers()) + zombies_containers = existing_containers - running_containers + + # the prefix used to locate subvolumes + flavour_prefixes = [ + 'onelab-', + 'lxc-', + 'omf-', + ] + + # we need to call 'btrfs subvolume delete' on these remainings + # instead of just 'rm' + if zombies_containers: + commands = [] + zombie_dirs = ["/vservers/"+z for z in zombies_containers] + print("-------- Found {} existing, but not running, containers".format(len(zombies_containers))) + print("zombie_dirs='{}'".format(" ".join(zombie_dirs))) + subvolumes = [ path + for z in zombies_containers + for prefix in flavour_prefixes + for path in glob.glob("/vservers/{z}/{prefix}*".format(z=z, prefix=prefix))] + if subvolumes: + print("zombie_subvolumes='{}'".format(" ".join(subvolumes))) + for subvolume in subvolumes: + commands.append([ 'btrfs', 'subvolume', 'delete', subvolume]) + for zombie_dir in zombie_dirs: + commands.append([ 'btrfs', 'subvolume', 'delete', zombie_dir ]) + display_or_run_commands(commands, args.run) + # find the containers dirs that might still exist + zombie_dirs = [ path for path in zombie_dirs if os.path.isdir(path) ] + commands = [ ['rm', '-rf', path] for path in zombie_dirs ] + display_or_run_commands(commands, args.run) + + #### should happen much less frequently + weirdos_containers = running_containers - existing_containers + if weirdos_containers: + print("-------- Found {} running but non existing".format(len(weirdos_containers))) + for w in weirdos_containers: + print("/vservers/{}".format(w)) + +main() -- 2.43.0