X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=accounts.py;h=59ecedb01d8f019dc3321f8f7814cb5de5c3d1b2;hb=c9c814b39e0bcb6a20ebbe617a60f2ee6930c180;hp=b22a4ba95304f8227fe560e5fc7649bcd99c4a34;hpb=aac3e5d7c3443d6e1cb33525aefad35be5fe077a;p=nodemanager.git diff --git a/accounts.py b/accounts.py index b22a4ba..59ecedb 100644 --- a/accounts.py +++ b/accounts.py @@ -1,109 +1,193 @@ -import Queue +# $Id$ +# $URL$ + +"""Functionality common to all account classes. + +Each subclass of Account must provide five methods: + (*) create() and destroy(), which are static; + (*) configure(), start(), and stop(), which are not. + +configure(), which takes a record as its only argument, does +things like set up ssh keys. In addition, an Account subclass must +provide static member variables SHELL, which contains the unique shell +that it uses; and TYPE, a string that is used by the account creation +code. For no particular reason, TYPE is divided hierarchically by +periods; at the moment the only convention is that all sliver accounts +have type that begins with sliver. + +There are any number of race conditions that may result from the fact +that account names are not unique over time. Moreover, it's a bad +idea to perform lengthy operations while holding the database lock. +In order to deal with both of these problems, we use a worker thread +for each account name that ever exists. On 32-bit systems with large +numbers of accounts, this may cause the NM process to run out of +*virtual* memory! This problem may be remedied by decreasing the +maximum stack size. +""" + import os -import pwd +import pwd, grp import threading import logger import tools -_name_worker_lock = threading.Lock() -_name_worker = {} +# shell path -> account class association +shell_acct_class = {} +# account type -> account class association +type_acct_class = {} -def all(): - pw_ents = pwd.getpwall() - for pw_ent in pw_ents: - if pw_ent[6] in acct_class_by_shell: - yield acct_class_by_shell[pw_ent[6]].TYPE, pw_ent[0] +# these semaphores are acquired before creating/destroying an account +create_sem = threading.Semaphore(1) +destroy_sem = threading.Semaphore(1) -def get(name): - _name_worker_lock.acquire() - try: - if name not in _name_worker: _name_worker[name] = Worker(name) - return _name_worker[name] - finally: _name_worker_lock.release() +def register_class(acct_class): + """Call once for each account class. This method adds the class +to the dictionaries used to look up account classes by shell and +type.""" + shell_acct_class[acct_class.SHELL] = acct_class + type_acct_class[acct_class.TYPE] = acct_class -def install_ssh_keys(rec): - """Write to 's authorized_keys file.""" - dot_ssh = '/home/%s/.ssh' % rec['name'] - def do_installation(): - if not os.access(dot_ssh, os.F_OK): os.mkdir(dot_ssh) - tools.write_file(dot_ssh + '/authorized_keys', - lambda thefile: thefile.write(rec['ssh_keys'])) - logger.log('%s: installing ssh keys' % rec['name']) - tools.fork_as(rec['name'], do_installation) +# private account name -> worker object association and associated lock +name_worker_lock = threading.Lock() +name_worker = {} +def allpwents(): + return [pw_ent for pw_ent in pwd.getpwall() if pw_ent[6] in shell_acct_class] -TYPES = [] -acct_class_by_shell = {} -acct_class_by_type = {} - -def register_account_type(acct_class): - TYPES.append(acct_class.TYPE) - acct_class_by_shell[acct_class.SHELL] = acct_class - acct_class_by_type[acct_class.TYPE] = acct_class +def all(): + """Return the names of all accounts on the system with recognized shells.""" + return [pw_ent[0] for pw_ent in allpwents()] +def get(name): + """Return the worker object for a particular username. If no such object exists, create it first.""" + name_worker_lock.acquire() + try: + if name not in name_worker: name_worker[name] = Worker(name) + return name_worker[name] + finally: name_worker_lock.release() + + +class Account: + def __init__(self, rec): + logger.verbose('accounts: Initing account %s'%rec['name']) + self.name = rec['name'] + self.keys = '' + self.configure(rec) + + @staticmethod + def create(name, vref = None): abstract + + @staticmethod + def destroy(name): abstract + + def configure(self, rec): + """Write to my authorized_keys file.""" + logger.verbose('accounts: configuring %s'%self.name) + new_keys = rec['keys'] + if new_keys != self.keys: + # get the unix account info + gid = grp.getgrnam("slices")[2] + pw_info = pwd.getpwnam(self.name) + uid = pw_info[2] + pw_dir = pw_info[5] + + # write out authorized_keys file and conditionally create + # the .ssh subdir if need be. + dot_ssh = os.path.join(pw_dir,'.ssh') + if not os.path.isdir(dot_ssh): + if not os.path.isdir(pw_dir): + logger.verbose('accounts: WARNING: homedir %s does not exist for %s!'%(pw_dir,self.name)) + os.mkdir(pw_dir) + os.chown(pw_dir, uid, gid) + os.mkdir(dot_ssh) + + auth_keys = os.path.join(dot_ssh,'authorized_keys') + tools.write_file(auth_keys, lambda f: f.write(new_keys)) + + # set access permissions and ownership properly + os.chmod(dot_ssh, 0700) + os.chown(dot_ssh, uid, gid) + os.chmod(auth_keys, 0600) + os.chown(auth_keys, uid, gid) + + # set self.keys to new_keys only when all of the above ops succeed + self.keys = new_keys + + logger.log('accounts: %s: installed ssh keys' % self.name) + + def start(self, delay=0): pass + def stop(self): pass + def is_running(self): pass class Worker: - # these semaphores are acquired before creating/destroying an account - _create_sem = threading.Semaphore(1) - _destroy_sem = threading.Semaphore(1) def __init__(self, name): - self.name = name - self._acct = None - self._q = Queue.Queue() - tools.as_daemon_thread(self._run) + self.name = name # username + self._acct = None # the account object currently associated with this worker def ensure_created(self, rec): - self._q.put((self._ensure_created, tools.deepcopy(rec))) - - def _ensure_created(self, rec): + """Check account type is still valid. If not, recreate sliver. +If still valid, check if running and configure/start if not.""" + logger.log_data_in_file(rec,"/var/lib/nodemanager/%s.rec.txt"%rec['name'], + 'raw rec captured in ensure_created',logger.LOG_VERBOSE) curr_class = self._get_class() - next_class = acct_class_by_type[rec['account_type']] + next_class = type_acct_class[rec['type']] if next_class != curr_class: self._destroy(curr_class) - self._create_sem.acquire() - try: next_class.create(self.name) - finally: self._create_sem.release() - self._make_acct_obj() + create_sem.acquire() + try: next_class.create(self.name, rec['vref']) + finally: create_sem.release() + if not isinstance(self._acct, next_class): self._acct = next_class(rec) + logger.verbose("accounts.ensure_created: %s, running=%r"%(self.name,self.is_running())) + + # reservation_alive is set on reervable nodes, and its value is a boolean + if 'reservation_alive' in rec: + # reservable nodes + if rec['reservation_alive']: + # this sliver has the lease, it is safe to start it + if not self.is_running(): self.start(rec) + else: self.configure(rec) + else: + # not having the lease, do not start it + self.configure(rec) + # usual nodes - preserve old code + # xxx it's not clear what to do when a sliver changes type/class + # in a reservable node + else: + if not self.is_running() or next_class != curr_class: + self.start(rec) + else: self.configure(rec) + + def ensure_destroyed(self): self._destroy(self._get_class()) + + def start(self, rec, d = 0): self._acct.configure(rec) - if next_class != curr_class: self._acct.start() + self._acct.start(delay=d) - def ensure_destroyed(self): self._q.put((self._ensure_destroyed,)) - def _ensure_destroyed(self): self._destroy(self._get_class()) + def configure(self, rec): + self._acct.configure(rec) - def start(self): self._q.put((self._start,)) - def _start(self): - self._make_acct_obj() - self._acct.start() + def stop(self): self._acct.stop() - def stop(self): self._q.put((self._stop,)) - def _stop(self): - self._make_acct_obj() - self._acct.stop() + def is_running(self): + if (self._acct != None) and self._acct.is_running(): + status = True + else: + status = False + logger.verbose("accounts: Worker(%s): is not running" % self.name) + return status def _destroy(self, curr_class): self._acct = None if curr_class: - self._destroy_sem.acquire() + destroy_sem.acquire() try: curr_class.destroy(self.name) - finally: self._destroy_sem.release() + finally: destroy_sem.release() def _get_class(self): try: shell = pwd.getpwnam(self.name)[6] except KeyError: return None - return acct_class_by_shell[shell] - - def _make_acct_obj(self): - curr_class = self._get_class() - if not isinstance(self._acct, curr_class): - self._acct = curr_class(self.name) - - def _run(self): - while True: - try: - cmd = self._q.get() - cmd[0](*cmd[1:]) - except: logger.log_exc() + return shell_acct_class[shell]