X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=tools.py;h=02e2ab9b7193b17f8f9aef7e9e6ccd3f9f65215a;hb=9e6b9c1ea9e020c55c85b433bac47231d63e9ffd;hp=49441bda94a42af81f0e922565d97f37dc826911;hpb=2f100cd2a78a0f91022bc4a895007975181cf353;p=nodemanager.git diff --git a/tools.py b/tools.py index 49441bd..02e2ab9 100644 --- a/tools.py +++ b/tools.py @@ -1,18 +1,45 @@ """A few things that didn't seem to fit anywhere else.""" -import cPickle -import errno -import os +import os, os.path import pwd import tempfile -import threading import fcntl +import errno +import threading +import subprocess +import shutil +import sys +import signal import logger +PID_FILE = '/var/run/nodemanager.pid' + +#################### +def get_default_if(): + interface = get_if_from_hwaddr(get_hwaddr_from_plnode()) + if not interface: interface = "eth0" + return interface -PID_FILE = '/var/run/nm.pid' +def get_hwaddr_from_plnode(): + try: + for line in open("/usr/boot/plnode.txt", 'r').readlines(): + if line.startswith("NET_DEVICE"): + return line.split("=")[1].strip().strip('"') + except: + pass + return None +def get_if_from_hwaddr(hwaddr): + import sioc + devs = sioc.gifconf() + for dev in devs: + dev_hwaddr = sioc.gifhwaddr(dev) + if dev_hwaddr == hwaddr: return dev + return None + +#################### +# daemonizing def as_daemon_thread(run): """Call function with no arguments in its own thread.""" thr = threading.Thread(target=run) @@ -33,11 +60,11 @@ def daemon(): os.setsid() if os.fork() != 0: os._exit(0) os.chdir('/') - os.umask(0) + os.umask(0022) devnull = os.open(os.devnull, os.O_RDWR) os.dup2(devnull, 0) # xxx fixme - this is just to make sure that nothing gets stupidly lost - should use devnull - crashlog = os.open('/var/log/nm.daemon', os.O_RDWR | os.O_APPEND | os.O_CREAT, 0644) + crashlog = os.open('/var/log/nodemanager.daemon', os.O_RDWR | os.O_APPEND | os.O_CREAT, 0644) os.dup2(crashlog, 1) os.dup2(crashlog, 2) @@ -57,12 +84,16 @@ def fork_as(su, function, *args): except: os.seteuid(os.getuid()) # undo su so we can write the log file os.setegid(os.getgid()) - logger.log_exc() + logger.log_exc("tools: fork_as") os._exit(0) else: os.waitpid(child_pid, 0) +#################### +# manage files def pid_file(): - """We use a pid file to ensure that only one copy of NM is running at a given time. If successful, this function will write a pid file containing the pid of the current process. The return value is the pid of the other running process, or None otherwise.""" + """We use a pid file to ensure that only one copy of NM is running at a given time. +If successful, this function will write a pid file containing the pid of the current process. +The return value is the pid of the other running process, or None otherwise.""" other_pid = None if os.access(PID_FILE, os.F_OK): # check for a pid file handle = open(PID_FILE) # pid file exists, read it @@ -80,7 +111,7 @@ def pid_file(): def write_file(filename, do_write, **kw_args): """Write file atomically by opening a temporary file, using to write that file, and then renaming the temporary file.""" - os.rename(write_temp_file(do_write, **kw_args), filename) + shutil.move(write_temp_file(do_write, **kw_args), filename) def write_temp_file(do_write, mode=None, uidgid=None): fd, temporary_filename = tempfile.mkstemp() @@ -91,9 +122,65 @@ def write_temp_file(do_write, mode=None, uidgid=None): finally: f.close() return temporary_filename +# replace a target file with a new contents - checks for changes +# can handle chmod if requested +# can also remove resulting file if contents are void, if requested +# performs atomically: +# writes in a tmp file, which is then renamed (from sliverauth originally) +# returns True if a change occurred, or the file is deleted +def replace_file_with_string (target, new_contents, chmod=None, remove_if_empty=False): + try: + current=file(target).read() + except: + current="" + if current==new_contents: + # if turns out to be an empty string, and remove_if_empty is set, + # then make sure to trash the file if it exists + if remove_if_empty and not new_contents and os.path.isfile(target): + logger.verbose("tools.replace_file_with_string: removing file %s"%target) + try: os.unlink(target) + finally: return True + return False + # overwrite target file: create a temp in the same directory + path=os.path.dirname(target) or '.' + fd, name = tempfile.mkstemp('','repl',path) + os.write(fd,new_contents) + os.close(fd) + if os.path.exists(target): + os.unlink(target) + shutil.move(name,target) + if chmod: os.chmod(target,chmod) + return True + + +#################### +# utilities functions to get (cached) information from the node + +# get node_id from /etc/planetlab/node_id and cache it +_node_id=None +def node_id(): + global _node_id + if _node_id is None: + try: + _node_id=int(file("/etc/planetlab/node_id").read()) + except: + _node_id="" + return _node_id + +_root_context_arch=None +def root_context_arch(): + global _root_context_arch + if not _root_context_arch: + sp=subprocess.Popen(["uname","-i"],stdout=subprocess.PIPE) + (_root_context_arch,_)=sp.communicate() + _root_context_arch=_root_context_arch.strip() + return _root_context_arch + +#################### class NMLock: def __init__(self, file): + logger.log("tools: Lock %s initialized." % file, 2) self.fd = os.open(file, os.O_RDWR|os.O_CREAT, 0600) flags = fcntl.fcntl(self.fd, fcntl.F_GETFD) flags |= fcntl.FD_CLOEXEC @@ -101,6 +188,162 @@ class NMLock: def __del__(self): os.close(self.fd) def acquire(self): - fcntl.lockf(self.fd, fcntl.LOCK_EX) + logger.log("tools: Lock acquired.", 2) + fcntl.lockf(self.fd, fcntl.LOCK_SH) def release(self): + logger.log("tools: Lock released.", 2) fcntl.lockf(self.fd, fcntl.LOCK_UN) + +#################### +# Utilities for getting the IP address of a LXC/Openvswitch slice. Do this by +# running ifconfig inside of the slice's context. + +def get_sliver_process(slice_name, process_cmdline): + """ Utility function to find a process inside of an LXC sliver. Returns + (cgroup_fn, pid). cgroup_fn is the filename of the cgroup file for + the process, for example /proc/2592/cgroup. Pid is the process id of + the process. If the process is not found then (None, None) is returned. + """ + try: + cmd = 'grep %s /proc/*/cgroup | grep freezer'%slice_name + output = os.popen(cmd).readlines() + except: + # the slice couldn't be found + logger.log("get_sliver_process: couldn't find slice %s" % slice_name) + return (None, None) + + cgroup_fn = None + pid = None + for e in output: + try: + l = e.rstrip() + path = l.split(':')[0] + comp = l.rsplit(':')[-1] + slice_name_check = comp.rsplit('/')[-1] + + if (slice_name_check == slice_name): + slice_path = path + pid = slice_path.split('/')[2] + cmdline = open('/proc/%s/cmdline'%pid).read().rstrip('\n\x00') + if (cmdline == process_cmdline): + cgroup_fn = slice_path + break + except: + break + + if (not cgroup_fn) or (not pid): + logger.log("get_sliver_process: process %s not running in slice %s" % (process_cmdline, slice_name)) + return (None, None) + + return (cgroup_fn, pid) + +def get_sliver_ifconfig(slice_name, device="eth0"): + """ return the output of "ifconfig" run from inside the sliver. + + side effects: adds "/usr/sbin" to sys.path + """ + + # See if setns is installed. If it's not then we're probably not running + # LXC. + if not os.path.exists("/usr/sbin/setns.so"): + return None + + # setns is part of lxcsu and is installed to /usr/sbin + if not "/usr/sbin" in sys.path: + sys.path.append("/usr/sbin") + import setns + + (cgroup_fn, pid) = get_sliver_process(slice_name, "/sbin/init") + if (not cgroup_fn) or (not pid): + return None + + path = '/proc/%s/ns/net'%pid + + result = None + try: + setns.chcontext(path) + + args = ["/sbin/ifconfig", device] + sub = subprocess.Popen(args, stderr = subprocess.PIPE, stdout = subprocess.PIPE) + sub.wait() + + if (sub.returncode != 0): + logger.log("get_slice_ifconfig: error in ifconfig: %s" % sub.stderr.read()) + + result = sub.stdout.read() + finally: + setns.chcontext("/proc/1/ns/net") + + return result + +def get_sliver_ip(slice_name): + ifconfig = get_sliver_ifconfig(slice_name) + if not ifconfig: + return None + + for line in ifconfig.split("\n"): + if "inet addr:" in line: + # example: ' inet addr:192.168.122.189 Bcast:192.168.122.255 Mask:255.255.255.0' + parts = line.strip().split() + if len(parts)>=2 and parts[1].startswith("addr:"): + return parts[1].split(":")[1] + + return None + +### this returns the kind of virtualization on the node +# either 'vs' or 'lxc' +# also caches it in /etc/planetlab/virt for next calls +# could be promoted to core nm if need be +virt_stamp="/etc/planetlab/virt" +def get_node_virt (): + try: + return file(virt_stamp).read().strip() + except: + pass + logger.log("Computing virt..") + try: + if subprocess.call ([ 'vserver', '--help' ]) ==0: virt='vs' + else: virt='lxc' + except: + virt='lxc' + with file(virt_stamp,"w") as f: + f.write(virt) + return virt + +### this return True or False to indicate that systemctl is present on that box +# cache result in memory as _has_systemctl +_has_systemctl=None +def has_systemctl (): + global _has_systemctl + if _has_systemctl is None: + _has_systemctl = (subprocess.call([ 'systemctl', '--help' ]) == 0) + return _has_systemctl + +# how to run a command in a slice +# now this is a painful matter +# the problem is with capsh that forces a bash command to be injected in its exec'ed command +# so because lxcsu uses capsh, you cannot exec anything else than bash +# bottom line is, what actually needs to be called is +# vs: vserver exec slicename command and its arguments +# lxc: lxcsu slicename "command and its arguments" +# which, OK, is no big deal as long as the command is simple enough, +# but do not stretch it with arguments that have spaces or need quoting as that will become a nightmare +def command_in_slice (slicename, argv): + virt=get_node_virt() + if virt=='vs': + return [ 'vserver', slicename, 'exec', ] + argv + elif virt=='lxc': + # wrap up argv in a single string for -c + return [ 'lxcsu', slicename, ] + [ " ".join(argv) ] + logger.log("command_in_slice: WARNING: could not find a valid virt") + return argv + +#################### +def init_signals (): + def handler (signum, frame): + logger.log("Received signal %d - exiting"%signum) + os._exit(1) + signal.signal(signal.SIGHUP,handler) + signal.signal(signal.SIGQUIT,handler) + signal.signal(signal.SIGINT,handler) + signal.signal(signal.SIGTERM,handler)