X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=tools.py;h=02e2ab9b7193b17f8f9aef7e9e6ccd3f9f65215a;hb=9e6b9c1ea9e020c55c85b433bac47231d63e9ffd;hp=83f0327a7d5d3649f19e02fc68ea1ab23dd7eb5d;hpb=2bea6afe154924341f57f7b8633b9ca87b53b82f;p=nodemanager.git diff --git a/tools.py b/tools.py index 83f0327..02e2ab9 100644 --- a/tools.py +++ b/tools.py @@ -1,17 +1,45 @@ """A few things that didn't seem to fit anywhere else.""" -import cPickle -import errno -import os +import os, os.path import pwd import tempfile +import fcntl +import errno import threading +import subprocess +import shutil +import sys +import signal import logger +PID_FILE = '/var/run/nodemanager.pid' + +#################### +def get_default_if(): + interface = get_if_from_hwaddr(get_hwaddr_from_plnode()) + if not interface: interface = "eth0" + return interface -PID_FILE = '/var/run/node_mgr.pid' +def get_hwaddr_from_plnode(): + try: + for line in open("/usr/boot/plnode.txt", 'r').readlines(): + if line.startswith("NET_DEVICE"): + return line.split("=")[1].strip().strip('"') + except: + pass + return None +def get_if_from_hwaddr(hwaddr): + import sioc + devs = sioc.gifconf() + for dev in devs: + dev_hwaddr = sioc.gifhwaddr(dev) + if dev_hwaddr == hwaddr: return dev + return None + +#################### +# daemonizing def as_daemon_thread(run): """Call function with no arguments in its own thread.""" thr = threading.Thread(target=run) @@ -32,13 +60,13 @@ def daemon(): os.setsid() if os.fork() != 0: os._exit(0) os.chdir('/') - os.umask(0) + os.umask(0022) devnull = os.open(os.devnull, os.O_RDWR) - for fd in range(3): os.dup2(devnull, fd) - -def deepcopy(obj): - """Return a deep copy of obj.""" - return cPickle.loads(cPickle.dumps(obj, -1)) + os.dup2(devnull, 0) + # xxx fixme - this is just to make sure that nothing gets stupidly lost - should use devnull + crashlog = os.open('/var/log/nodemanager.daemon', os.O_RDWR | os.O_APPEND | os.O_CREAT, 0644) + os.dup2(crashlog, 1) + os.dup2(crashlog, 2) def fork_as(su, function, *args): """fork(), cd / to avoid keeping unused directories open, close all nonstandard file descriptors (to avoid capturing open sockets), fork() again (to avoid zombies) and call with arguments in the grandchild process. If is not None, set our group and user ids appropriately in the child process.""" @@ -47,20 +75,25 @@ def fork_as(su, function, *args): try: os.chdir('/') close_nonstandard_fds() - pw_ent = pwd.getpwnam(su) - os.setegid(pw_ent[3]) - os.seteuid(pw_ent[2]) + if su: + pw_ent = pwd.getpwnam(su) + os.setegid(pw_ent[3]) + os.seteuid(pw_ent[2]) child_pid = os.fork() if child_pid == 0: function(*args) except: os.seteuid(os.getuid()) # undo su so we can write the log file os.setegid(os.getgid()) - logger.log_exc() + logger.log_exc("tools: fork_as") os._exit(0) else: os.waitpid(child_pid, 0) +#################### +# manage files def pid_file(): - """We use a pid file to ensure that only one copy of NM is running at a given time. If successful, this function will write a pid file containing the pid of the current process. The return value is the pid of the other running process, or None otherwise.""" + """We use a pid file to ensure that only one copy of NM is running at a given time. +If successful, this function will write a pid file containing the pid of the current process. +The return value is the pid of the other running process, or None otherwise.""" other_pid = None if os.access(PID_FILE, os.F_OK): # check for a pid file handle = open(PID_FILE) # pid file exists, read it @@ -76,13 +109,241 @@ def pid_file(): write_file(PID_FILE, lambda f: f.write(str(os.getpid()))) return other_pid -def write_file(filename, do_write): +def write_file(filename, do_write, **kw_args): """Write file atomically by opening a temporary file, using to write that file, and then renaming the temporary file.""" - os.rename(write_temp_file(do_write), filename) + shutil.move(write_temp_file(do_write, **kw_args), filename) -def write_temp_file(do_write): +def write_temp_file(do_write, mode=None, uidgid=None): fd, temporary_filename = tempfile.mkstemp() + if mode: os.chmod(temporary_filename, mode) + if uidgid: os.chown(temporary_filename, *uidgid) f = os.fdopen(fd, 'w') try: do_write(f) finally: f.close() return temporary_filename + +# replace a target file with a new contents - checks for changes +# can handle chmod if requested +# can also remove resulting file if contents are void, if requested +# performs atomically: +# writes in a tmp file, which is then renamed (from sliverauth originally) +# returns True if a change occurred, or the file is deleted +def replace_file_with_string (target, new_contents, chmod=None, remove_if_empty=False): + try: + current=file(target).read() + except: + current="" + if current==new_contents: + # if turns out to be an empty string, and remove_if_empty is set, + # then make sure to trash the file if it exists + if remove_if_empty and not new_contents and os.path.isfile(target): + logger.verbose("tools.replace_file_with_string: removing file %s"%target) + try: os.unlink(target) + finally: return True + return False + # overwrite target file: create a temp in the same directory + path=os.path.dirname(target) or '.' + fd, name = tempfile.mkstemp('','repl',path) + os.write(fd,new_contents) + os.close(fd) + if os.path.exists(target): + os.unlink(target) + shutil.move(name,target) + if chmod: os.chmod(target,chmod) + return True + + +#################### +# utilities functions to get (cached) information from the node + +# get node_id from /etc/planetlab/node_id and cache it +_node_id=None +def node_id(): + global _node_id + if _node_id is None: + try: + _node_id=int(file("/etc/planetlab/node_id").read()) + except: + _node_id="" + return _node_id + +_root_context_arch=None +def root_context_arch(): + global _root_context_arch + if not _root_context_arch: + sp=subprocess.Popen(["uname","-i"],stdout=subprocess.PIPE) + (_root_context_arch,_)=sp.communicate() + _root_context_arch=_root_context_arch.strip() + return _root_context_arch + + +#################### +class NMLock: + def __init__(self, file): + logger.log("tools: Lock %s initialized." % file, 2) + self.fd = os.open(file, os.O_RDWR|os.O_CREAT, 0600) + flags = fcntl.fcntl(self.fd, fcntl.F_GETFD) + flags |= fcntl.FD_CLOEXEC + fcntl.fcntl(self.fd, fcntl.F_SETFD, flags) + def __del__(self): + os.close(self.fd) + def acquire(self): + logger.log("tools: Lock acquired.", 2) + fcntl.lockf(self.fd, fcntl.LOCK_SH) + def release(self): + logger.log("tools: Lock released.", 2) + fcntl.lockf(self.fd, fcntl.LOCK_UN) + +#################### +# Utilities for getting the IP address of a LXC/Openvswitch slice. Do this by +# running ifconfig inside of the slice's context. + +def get_sliver_process(slice_name, process_cmdline): + """ Utility function to find a process inside of an LXC sliver. Returns + (cgroup_fn, pid). cgroup_fn is the filename of the cgroup file for + the process, for example /proc/2592/cgroup. Pid is the process id of + the process. If the process is not found then (None, None) is returned. + """ + try: + cmd = 'grep %s /proc/*/cgroup | grep freezer'%slice_name + output = os.popen(cmd).readlines() + except: + # the slice couldn't be found + logger.log("get_sliver_process: couldn't find slice %s" % slice_name) + return (None, None) + + cgroup_fn = None + pid = None + for e in output: + try: + l = e.rstrip() + path = l.split(':')[0] + comp = l.rsplit(':')[-1] + slice_name_check = comp.rsplit('/')[-1] + + if (slice_name_check == slice_name): + slice_path = path + pid = slice_path.split('/')[2] + cmdline = open('/proc/%s/cmdline'%pid).read().rstrip('\n\x00') + if (cmdline == process_cmdline): + cgroup_fn = slice_path + break + except: + break + + if (not cgroup_fn) or (not pid): + logger.log("get_sliver_process: process %s not running in slice %s" % (process_cmdline, slice_name)) + return (None, None) + + return (cgroup_fn, pid) + +def get_sliver_ifconfig(slice_name, device="eth0"): + """ return the output of "ifconfig" run from inside the sliver. + + side effects: adds "/usr/sbin" to sys.path + """ + + # See if setns is installed. If it's not then we're probably not running + # LXC. + if not os.path.exists("/usr/sbin/setns.so"): + return None + + # setns is part of lxcsu and is installed to /usr/sbin + if not "/usr/sbin" in sys.path: + sys.path.append("/usr/sbin") + import setns + + (cgroup_fn, pid) = get_sliver_process(slice_name, "/sbin/init") + if (not cgroup_fn) or (not pid): + return None + + path = '/proc/%s/ns/net'%pid + + result = None + try: + setns.chcontext(path) + + args = ["/sbin/ifconfig", device] + sub = subprocess.Popen(args, stderr = subprocess.PIPE, stdout = subprocess.PIPE) + sub.wait() + + if (sub.returncode != 0): + logger.log("get_slice_ifconfig: error in ifconfig: %s" % sub.stderr.read()) + + result = sub.stdout.read() + finally: + setns.chcontext("/proc/1/ns/net") + + return result + +def get_sliver_ip(slice_name): + ifconfig = get_sliver_ifconfig(slice_name) + if not ifconfig: + return None + + for line in ifconfig.split("\n"): + if "inet addr:" in line: + # example: ' inet addr:192.168.122.189 Bcast:192.168.122.255 Mask:255.255.255.0' + parts = line.strip().split() + if len(parts)>=2 and parts[1].startswith("addr:"): + return parts[1].split(":")[1] + + return None + +### this returns the kind of virtualization on the node +# either 'vs' or 'lxc' +# also caches it in /etc/planetlab/virt for next calls +# could be promoted to core nm if need be +virt_stamp="/etc/planetlab/virt" +def get_node_virt (): + try: + return file(virt_stamp).read().strip() + except: + pass + logger.log("Computing virt..") + try: + if subprocess.call ([ 'vserver', '--help' ]) ==0: virt='vs' + else: virt='lxc' + except: + virt='lxc' + with file(virt_stamp,"w") as f: + f.write(virt) + return virt + +### this return True or False to indicate that systemctl is present on that box +# cache result in memory as _has_systemctl +_has_systemctl=None +def has_systemctl (): + global _has_systemctl + if _has_systemctl is None: + _has_systemctl = (subprocess.call([ 'systemctl', '--help' ]) == 0) + return _has_systemctl + +# how to run a command in a slice +# now this is a painful matter +# the problem is with capsh that forces a bash command to be injected in its exec'ed command +# so because lxcsu uses capsh, you cannot exec anything else than bash +# bottom line is, what actually needs to be called is +# vs: vserver exec slicename command and its arguments +# lxc: lxcsu slicename "command and its arguments" +# which, OK, is no big deal as long as the command is simple enough, +# but do not stretch it with arguments that have spaces or need quoting as that will become a nightmare +def command_in_slice (slicename, argv): + virt=get_node_virt() + if virt=='vs': + return [ 'vserver', slicename, 'exec', ] + argv + elif virt=='lxc': + # wrap up argv in a single string for -c + return [ 'lxcsu', slicename, ] + [ " ".join(argv) ] + logger.log("command_in_slice: WARNING: could not find a valid virt") + return argv + +#################### +def init_signals (): + def handler (signum, frame): + logger.log("Received signal %d - exiting"%signum) + os._exit(1) + signal.signal(signal.SIGHUP,handler) + signal.signal(signal.SIGQUIT,handler) + signal.signal(signal.SIGINT,handler) + signal.signal(signal.SIGTERM,handler)