#!/usr/bin/env python3 # node manager has a few working assumptions, like # if a domain d does not exist, there is no /vservers/d # this utility tries to detect and assess potentially # conflictual situations, that could prevent nodemanager # from recovering properly # # the logic is simply to find zombie-containers, i.e. # VMs that do have a workdir in /vservers/ # but that are not reported as running by virsh --list # which suggests they have been improperly trashed ### # # then we trash them but for that some subdirs must be # btrfs-subvolume-delete'd and not rm-rf'ed # import subprocess import glob import os, os.path from argparse import ArgumentParser def running_domains(): command = [ 'virsh', '-c', 'lxc:///', 'list', '--name', ] names_string = subprocess.check_output( command, universal_newlines = True, stdin = subprocess.DEVNULL, ) names = [ name for name in names_string.strip().split("\n") if name ] return names def existing_vservers(): all_dirs = glob.glob("/vservers/*") dirs = ( dir for dir in all_dirs if os.path.isdir(dir) ) dirnames = ( path.replace("/vservers/", "") for path in dirs) return dirnames def display_or_run_commands(commands, run): if commands: if not run: print("========== You should run") for command in commands: print(" ".join(command)) else: for command in commands: print("Running {}".format(" ".join(command))) retcod = subprocess.call(command) if retcod != 0: print("Warning: failed with retcod = {}".format(retcod)) def main(): parser = ArgumentParser() # the default is to cowardly show commands to run # use --run to actually do it parser.add_argument("-r", "--run", action='store_true', default=False) args = parser.parse_args() running_containers = set(running_domains()) existing_containers = set(existing_vservers()) zombies_containers = existing_containers - running_containers # the prefix used to locate subvolumes flavour_prefixes = [ 'onelab-', 'lxc-', 'omf-', ] # we need to call 'btrfs subvolume delete' on these remainings # instead of just 'rm' if zombies_containers: commands = [] zombie_dirs = ["/vservers/"+z for z in zombies_containers] print("-------- Found {} existing, but not running, containers".format(len(zombies_containers))) print("zombie_dirs='{}'".format(" ".join(zombie_dirs))) subvolumes = [ path for z in zombies_containers for prefix in flavour_prefixes for path in glob.glob("/vservers/{z}/{prefix}*".format(z=z, prefix=prefix))] if subvolumes: print("zombie_subvolumes='{}'".format(" ".join(subvolumes))) for subvolume in subvolumes: commands.append([ 'btrfs', 'subvolume', 'delete', subvolume]) for zombie_dir in zombie_dirs: commands.append([ 'btrfs', 'subvolume', 'delete', zombie_dir ]) display_or_run_commands(commands, args.run) # find the containers dirs that might still exist zombie_dirs = [ path for path in zombie_dirs if os.path.isdir(path) ] commands = [ ['rm', '-rf', path] for path in zombie_dirs ] display_or_run_commands(commands, args.run) #### should happen much less frequently weirdos_containers = running_containers - existing_containers if weirdos_containers: print("-------- Found {} running but non existing".format(len(weirdos_containers))) for w in weirdos_containers: print("/vservers/{}".format(w)) main()