prettyfied - spaces
[sfa.git] / sfa / importer / plimporter.py
index 1e04e54..5d7fe9d 100644 (file)
@@ -24,7 +24,9 @@ from sfa.util.xrn import Xrn, get_leaf, get_authority, hrn_to_urn
 from sfa.trust.gid import create_uuid    
 from sfa.trust.certificate import convert_public_key, Keypair
 
-from sfa.storage.alchemy import dbsession
+# using global alchemy.session() here is fine 
+# as importer is on standalone one-shot process
+from sfa.storage.alchemy import global_dbsession
 from sfa.storage.model import RegRecord, RegAuthority, RegSlice, RegNode, RegUser, RegKey
 
 from sfa.planetlab.plshell import PlShell    
@@ -45,7 +47,7 @@ class PlImporter:
 
     def __init__ (self, auth_hierarchy, logger):
         self.auth_hierarchy = auth_hierarchy
-        self.logger=logger
+        self.logger = logger
 
     def add_options (self, parser):
         # we don't have any options for now
@@ -57,7 +59,7 @@ class PlImporter:
     def remember_record_by_hrn (self, record):
         tuple = (record.type, record.hrn)
         if tuple in self.records_by_type_hrn:
-            self.logger.warning ("PlImporter.remember_record_by_hrn: duplicate (%s,%s)"%tuple)
+            self.logger.warning ("PlImporter.remember_record_by_hrn: duplicate {}".format(tuple))
             return
         self.records_by_type_hrn [ tuple ] = record
 
@@ -68,7 +70,7 @@ class PlImporter:
             return
         tuple = (record.type, record.pointer)
         if tuple in self.records_by_type_pointer:
-            self.logger.warning ("PlImporter.remember_record_by_pointer: duplicate (%s,%s)"%tuple)
+            self.logger.warning ("PlImporter.remember_record_by_pointer: duplicate {}".format(tuple))
             return
         self.records_by_type_pointer [ ( record.type, record.pointer,) ] = record
 
@@ -115,9 +117,9 @@ class PlImporter:
                                            pointer=site['site_id'],
                                            authority=get_authority(site_hrn))
                 auth_record.just_created()
-                dbsession.add(auth_record)
-                dbsession.commit()
-                self.logger.info("PlImporter: Imported authority (vini site) %s"%auth_record)
+                global_dbsession.add(auth_record)
+                global_dbsession.commit()
+                self.logger.info("PlImporter: Imported authority (vini site) {}".format(auth_record))
                 self.remember_record ( site_record )
 
     def run (self, options):
@@ -127,7 +129,7 @@ class PlImporter:
         shell = PlShell (config)
 
         ######## retrieve all existing SFA objects
-        all_records = dbsession.query(RegRecord).all()
+        all_records = global_dbsession.query(RegRecord).all()
 
         # create hash by (type,hrn) 
         # we essentially use this to know if a given record is already known to SFA 
@@ -139,20 +141,24 @@ class PlImporter:
                      if record.pointer != -1] )
 
         # initialize record.stale to True by default, then mark stale=False on the ones that are in use
-        for record in all_records: record.stale=True
+        for record in all_records:
+            record.stale = True
 
         ######## retrieve PLC data
         # Get all plc sites
         # retrieve only required stuf
         sites = shell.GetSites({'peer_id': None, 'enabled' : True},
-                               ['site_id','login_base','node_ids','slice_ids','person_ids',])
+                               ['site_id','login_base','node_ids','slice_ids','person_ids', 'name', 'hrn'])
         # create a hash of sites by login_base
 #        sites_by_login_base = dict ( [ ( site['login_base'], site ) for site in sites ] )
         # Get all plc users
         persons = shell.GetPersons({'peer_id': None, 'enabled': True}, 
-                                   ['person_id', 'email', 'key_ids', 'site_ids', 'role_ids'])
+                                   ['person_id', 'email', 'key_ids', 'site_ids', 'role_ids', 'hrn'])
         # create a hash of persons by person_id
         persons_by_id = dict ( [ ( person['person_id'], person) for person in persons ] )
+        # also gather non-enabled user accounts so as to issue relevant warnings
+        disabled_persons = shell.GetPersons({'peer_id': None, 'enabled': False}, ['person_id'])
+        disabled_person_ids = [ person['person_id'] for person in disabled_persons ] 
         # Get all plc public keys
         # accumulate key ids for keys retrieval
         key_ids = []
@@ -167,28 +173,44 @@ class PlImporter:
         for person in persons:
             pubkeys = []
             for key_id in person['key_ids']:
-                key = keys_by_id[key_id]
-                if key['key_type'] == 'ssh': 
+                # by construction all the keys we fetched are ssh keys
+                # so gpg keys won't be in there
+                try:
+                    key = keys_by_id[key_id]
                     pubkeys.append(key)
+                except:
+                    self.logger.warning("Could not spot key {} - probably non-ssh".format(key_id))
             keys_by_person_id[person['person_id']] = pubkeys
         # Get all plc nodes  
         nodes = shell.GetNodes( {'peer_id': None}, ['node_id', 'hostname', 'site_id'])
         # create hash by node_id
         nodes_by_id = dict ( [ ( node['node_id'], node, ) for node in nodes ] )
         # Get all plc slices
-        slices = shell.GetSlices( {'peer_id': None}, ['slice_id', 'name', 'person_ids'])
+        slices = shell.GetSlices( {'peer_id': None}, ['slice_id', 'name', 'person_ids', 'hrn'])
         # create hash by slice_id
         slices_by_id = dict ( [ (slice['slice_id'], slice ) for slice in slices ] )
 
         # isolate special vini case in separate method
         self.create_special_vini_record (interface_hrn)
 
+        # Get top authority record
+        top_auth_record = self.locate_by_type_hrn ('authority', root_auth)
+        admins = []
+
         # start importing 
         for site in sites:
-            site_hrn = _get_site_hrn(interface_hrn, site)
+            try:
+               site_sfa_created = shell.GetSiteSfaCreated(site['site_id'])
+            except: 
+               site_sfa_created = None
+            if site['name'].startswith('sfa:') or site_sfa_created == 'True':
+                continue
+
+            #site_hrn = _get_site_hrn(interface_hrn, site)
+            site_hrn = site['hrn']
             # import if hrn is not in list of existing hrns or if the hrn exists
             # but its not a site record
-            site_record=self.locate_by_type_hrn ('authority', site_hrn)
+            site_record = self.locate_by_type_hrn ('authority', site_hrn)
             if not site_record:
                 try:
                     urn = hrn_to_urn(site_hrn, 'authority')
@@ -197,28 +219,32 @@ class PlImporter:
                     auth_info = self.auth_hierarchy.get_auth_info(urn)
                     site_record = RegAuthority(hrn=site_hrn, gid=auth_info.get_gid_object(),
                                                pointer=site['site_id'],
-                                               authority=get_authority(site_hrn))
+                                               authority=get_authority(site_hrn),
+                                               name=site['name'])
                     site_record.just_created()
-                    dbsession.add(site_record)
-                    dbsession.commit()
-                    self.logger.info("PlImporter: imported authority (site) : %s" % site_record) 
-                    self.remember_record (site_record)
+                    global_dbsession.add(site_record)
+                    global_dbsession.commit()
+                    self.logger.info("PlImporter: imported authority (site) : {}".format(site_record))
+                    self.remember_record(site_record)
                 except:
                     # if the site import fails then there is no point in trying to import the
                     # site's child records (node, slices, persons), so skip them.
-                    self.logger.log_exc("PlImporter: failed to import site. Skipping child records") 
+                    self.logger.log_exc("PlImporter: failed to import site {}. Skipping child records"\
+                                        .format(site_hrn))
                     continue 
             else:
                 # xxx update the record ...
+                site_record.name = site['name']
                 pass
-            site_record.stale=False
+            site_record.stale = False
              
             # import node records
             for node_id in site['node_ids']:
                 try:
                     node = nodes_by_id[node_id]
                 except:
-                    self.logger.warning ("PlImporter: cannot find node_id %s - ignored"%node_id)
+                    self.logger.warning ("PlImporter: cannot find node_id {} - ignored"
+                                         .format(node_id))
                     continue 
                 site_auth = get_authority(site_hrn)
                 site_name = site['login_base']
@@ -235,45 +261,61 @@ class PlImporter:
                                                pointer =node['node_id'],
                                                authority=get_authority(node_hrn))
                         node_record.just_created()
-                        dbsession.add(node_record)
-                        dbsession.commit()
-                        self.logger.info("PlImporter: imported node: %s" % node_record)  
+                        global_dbsession.add(node_record)
+                        global_dbsession.commit()
+                        self.logger.info("PlImporter: imported node: {}".format(node_record))
                         self.remember_record (node_record)
                     except:
-                        self.logger.log_exc("PlImporter: failed to import node") 
+                        self.logger.log_exc("PlImporter: failed to import node {}".format(node_hrn))
+                        continue
                 else:
                     # xxx update the record ...
                     pass
-                node_record.stale=False
+                node_record.stale = False
 
-            site_pis=[]
+            site_pis = []
             # import persons
             for person_id in site['person_ids']:
-                try:
+                proceed = False
+                if person_id in persons_by_id:
                     person = persons_by_id[person_id]
-                except:
-                    self.logger.warning ("PlImporter: cannot locate person_id %s - ignored"%person_id)
-                person_hrn = email_to_hrn(site_hrn, person['email'])
+                    proceed = True
+                elif person_id in disabled_person_ids:
+                    pass
+                else:
+                    self.logger.warning ("PlImporter: cannot locate person_id {} in site {} - ignored"\
+                                         .format(person_id, site_hrn))
+                # make sure to NOT run this if anything is wrong
+                if not proceed: continue
+
+                #person_hrn = email_to_hrn(site_hrn, person['email'])
+                person_hrn = person['hrn']
+                if person_hrn is None:
+                    self.logger.warn("Person {} has no hrn - skipped".format(person['email']))
+                    continue
                 # xxx suspicious again
-                if len(person_hrn) > 64: person_hrn = person_hrn[:64]
+                if len(person_hrn) > 64:
+                    person_hrn = person_hrn[:64]
                 person_urn = hrn_to_urn(person_hrn, 'user')
 
                 user_record = self.locate_by_type_hrn ( 'user', person_hrn)
 
                 # return a tuple pubkey (a plc key object) and pkey (a Keypair object)
                 def init_person_key (person, plc_keys):
-                    pubkey=None
+                    pubkey = None
                     if  person['key_ids']:
                         # randomly pick first key in set
                         pubkey = plc_keys[0]
                         try:
                             pkey = convert_public_key(pubkey['key'])
                         except:
-                            self.logger.warn('PlImporter: unable to convert public key for %s' % person_hrn)
+                            self.logger.warn('PlImporter: unable to convert public key for {}'
+                                             .format(person_hrn))
                             pkey = Keypair(create=True)
                     else:
                         # the user has no keys. Creating a random keypair for the user's gid
-                        self.logger.warn("PlImporter: person %s does not have a PL public key"%person_hrn)
+                        self.logger.warn("PlImporter: person {} does not have a PL public key"
+                                         .format(person_hrn))
                         pkey = Keypair(create=True)
                     return (pubkey, pkey)
 
@@ -281,65 +323,107 @@ class PlImporter:
                 try:
                     plc_keys = keys_by_person_id.get(person['person_id'],[])
                     if not user_record:
-                        (pubkey,pkey) = init_person_key (person, plc_keys )
-                        person_gid = self.auth_hierarchy.create_gid(person_urn, create_uuid(), pkey)
-                        person_gid.set_email(person['email'])
+                        (pubkey, pkey) = init_person_key (person, plc_keys )
+                        person_gid = self.auth_hierarchy.create_gid(person_urn, create_uuid(), pkey,
+                                                                    email=person['email'])
                         user_record = RegUser (hrn=person_hrn, gid=person_gid, 
-                                                 pointer=person['person_id'], 
-                                                 authority=get_authority(person_hrn),
-                                                 email=person['email'])
+                                               pointer=person['person_id'], 
+                                               authority=get_authority(person_hrn),
+                                               email=person['email'])
                         if pubkey: 
                             user_record.reg_keys=[RegKey (pubkey['key'], pubkey['key_id'])]
                         else:
-                            self.logger.warning("No key found for user %s"%user_record)
+                            self.logger.warning("No key found for user {}".format(user_record))
                         user_record.just_created()
-                        dbsession.add (user_record)
-                        dbsession.commit()
-                        self.logger.info("PlImporter: imported person: %s" % user_record)
+                        global_dbsession.add (user_record)
+                        global_dbsession.commit()
+                        self.logger.info("PlImporter: imported person: {}".format(user_record))
                         self.remember_record ( user_record )
                     else:
                         # update the record ?
-                        # if user's primary key has changed then we need to update the 
+                        #
+                        # if a user key has changed then we need to update the
                         # users gid by forcing an update here
+                        #
+                        # right now, SFA only has *one* key attached to a user, and this is
+                        # the key that the GID was made with
+                        # so the logic here is, we consider that things are OK (unchanged) if
+                        # all the SFA keys are present as PLC keys
+                        # otherwise we trigger the creation of a new gid from *some* plc key
+                        # and record this on the SFA side
+                        # it would make sense to add a feature in PLC so that one could pick a 'primary'
+                        # key but this is not available on the myplc side for now
+                        # = or = it would be much better to support several keys in SFA but that
+                        # does not seem doable without a major overhaul in the data model as
+                        # a GID is attached to a hrn, but it's also linked to a key, so...
+                        # NOTE: with this logic, the first key entered in PLC remains the one
+                        # current in SFA until it is removed from PLC
                         sfa_keys = user_record.reg_keys
-                        def key_in_list (key,sfa_keys):
-                            for reg_key in sfa_keys:
-                                if reg_key.key==key['key']: return True
+                        def sfa_key_in_list (sfa_key,plc_keys):
+                            for plc_key in plc_keys:
+                                if plc_key['key'] == sfa_key.key:
+                                    return True
                             return False
-                        # is there a new key in myplc ?
-                        new_keys=False
-                        for key in plc_keys:
-                            if not key_in_list (key,sfa_keys):
-                                new_keys = True
+                        # are all the SFA keys known to PLC ?
+                        new_keys = False
+                        if not sfa_keys and plc_keys:
+                            new_keys = True
+                        else: 
+                            for sfa_key in sfa_keys:
+                                 if not sfa_key_in_list (sfa_key,plc_keys):
+                                     new_keys = True
                         if new_keys:
                             (pubkey,pkey) = init_person_key (person, plc_keys)
                             person_gid = self.auth_hierarchy.create_gid(person_urn, create_uuid(), pkey)
+                            person_gid.set_email(person['email'])
                             if not pubkey:
-                                user_record.reg_keys=[]
+                                user_record.reg_keys = []
                             else:
-                                user_record.reg_keys=[ RegKey (pubkey['key'], pubkey['key_id'])]
-                            self.logger.info("PlImporter: updated person: %s" % user_record)
+                                user_record.reg_keys = [ RegKey (pubkey['key'], pubkey['key_id'])]
+                            user_record.gid = person_gid
+                            user_record.just_updated()
+                            self.logger.info("PlImporter: updated person: {}".format(user_record))
                     user_record.email = person['email']
-                    dbsession.commit()
-                    user_record.stale=False
+                    global_dbsession.commit()
+                    user_record.stale = False
                     # accumulate PIs - PLCAPI has a limitation that when someone has PI role
                     # this is valid for all sites she is in..
-                    # PI is coded with role_id==20
+                    # PI is coded with role_id == 20
                     if 20 in person['role_ids']:
                         site_pis.append (user_record)
+
+                    # PL Admins need to marked as PI of the top authority record
+                    if 10 in person['role_ids'] and user_record not in top_auth_record.reg_pis:
+                        admins.append(user_record)
+
                 except:
-                    self.logger.log_exc("PlImporter: failed to import person %d %s"%(person['person_id'],person['email']))
+                    self.logger.log_exc("PlImporter: failed to import person {} {}"
+                                        .format(person['person_id'], person['email']))
     
             # maintain the list of PIs for a given site
-            site_record.reg_pis = site_pis
+            # for the record, Jordan had proposed the following addition as a welcome hotfix to a previous version:
+            # site_pis = list(set(site_pis)) 
+            # this was likely due to a bug in the above logic, that had to do with disabled persons
+            # being improperly handled, and where the whole loop on persons
+            # could be performed twice with the same person...
+            # so hopefully we do not need to eliminate duplicates explicitly here anymore
+            site_record.reg_pis = list(set(site_pis))
+            global_dbsession.commit()
 
             # import slices
             for slice_id in site['slice_ids']:
                 try:
                     slice = slices_by_id[slice_id]
                 except:
-                    self.logger.warning ("PlImporter: cannot locate slice_id %s - ignored"%slice_id)
-                slice_hrn = slicename_to_hrn(interface_hrn, slice['name'])
+                    self.logger.warning ("PlImporter: cannot locate slice_id {} - ignored"
+                                         .format(slice_id))
+                    continue
+                #slice_hrn = slicename_to_hrn(interface_hrn, slice['name'])
+                slice_hrn = slice['hrn']
+                if slice_hrn is None:
+                    self.logger.warning("Slice {} has no hrn - skipped"
+                                        .format(slice['name']))
+                    continue
                 slice_record = self.locate_by_type_hrn ('slice', slice_hrn)
                 if not slice_record:
                     try:
@@ -350,40 +434,50 @@ class PlImporter:
                                                  pointer=slice['slice_id'],
                                                  authority=get_authority(slice_hrn))
                         slice_record.just_created()
-                        dbsession.add(slice_record)
-                        dbsession.commit()
-                        self.logger.info("PlImporter: imported slice: %s" % slice_record)  
+                        global_dbsession.add(slice_record)
+                        global_dbsession.commit()
+                        self.logger.info("PlImporter: imported slice: {}".format(slice_record))
                         self.remember_record ( slice_record )
                     except:
-                        self.logger.log_exc("PlImporter: failed to import slice")
+                        self.logger.log_exc("PlImporter: failed to import slice {} ({})"
+                                            .format(slice_hrn, slice['name']))
                 else:
                     # xxx update the record ...
-                    self.logger.warning ("Slice update not yet implemented")
+                    # given that we record the current set of users anyways, there does not seem to be much left to do here
+                    # self.logger.warning ("Slice update not yet implemented on slice {} ({})"
+                    #                      .format(slice_hrn, slice['name']))
                     pass
                 # record current users affiliated with the slice
                 slice_record.reg_researchers = \
                     [ self.locate_by_type_pointer ('user',user_id) for user_id in slice['person_ids'] ]
-                dbsession.commit()
-                slice_record.stale=False
+                global_dbsession.commit()
+                slice_record.stale = False
+
+        # Set PL Admins as PI's of the top authority
+        if admins:
+            top_auth_record.reg_pis = list(set(admins))
+            global_dbsession.commit()
+            self.logger.info('PlImporter: set PL admins {} as PIs of {}'
+                             .format(admins, top_auth_record.hrn))
 
         ### remove stale records
         # special records must be preserved
         system_hrns = [interface_hrn, root_auth, interface_hrn + '.slicemanager']
         for record in all_records: 
             if record.hrn in system_hrns: 
-                record.stale=False
+                record.stale = False
             if record.peer_authority:
-                record.stale=False
+                record.stale = False
             if ".vini" in interface_hrn and interface_hrn.endswith('vini') and \
                 record.hrn.endswith("internet2"):
-                record.stale=False
+                record.stale = False
 
         for record in all_records:
-            try:        stale=record.stale
+            try:        stale = record.stale
             except:     
-                stale=True
-                self.logger.warning("stale not found with %s"%record)
+                stale = True
+                self.logger.warning("stale not found with {}".format(record))
             if stale:
-                self.logger.info("PlImporter: deleting stale record: %s" % record)
-                dbsession.delete(record)
-                dbsession.commit()
+                self.logger.info("PlImporter: deleting stale record: {}".format(record))
+                global_dbsession.delete(record)
+                global_dbsession.commit()