sfa/storage/model.py

   1 from types import StringTypes
   2 from datetime import datetime
   3
   4 from sqlalchemy import Column, Integer, String, DateTime
   5 from sqlalchemy import Table, Column, MetaData, join, ForeignKey
   6 from sqlalchemy.orm import relationship, backref
   7 from sqlalchemy.orm import column_property
   8 from sqlalchemy.orm import object_mapper
   9 from sqlalchemy.orm import validates
  10 from sqlalchemy.ext.declarative import declarative_base
  11
  12 from sfa.util.sfalogging import logger
  13 from sfa.util.sfatime import utcparse, datetime_to_string
  14 from sfa.util.xml import XML
  15
  16 from sfa.trust.gid import GID
  17
  18 ##############################
  19 Base=declarative_base()
  20
  21 ####################
  22 # dicts vs objects
  23 ####################
  24 # historically the front end to the db dealt with dicts, so the code was only dealing with dicts
  25 # sqlalchemy however offers an object interface, meaning that you write obj.id instead of obj['id']
  26 # which is admittedly much nicer
  27 # however we still need to deal with dictionaries if only for the xmlrpc layer
  28 #
  29 # here are a few utilities for this
  30 #
  31 # (*) first off, when an old pieve of code needs to be used as-is, if only temporarily, the simplest trick
  32 # is to use obj.__dict__
  33 # this behaves exactly like required, i.e. obj.__dict__['field']='new value' does change obj.field
  34 # however this depends on sqlalchemy's implementation so it should be avoided
  35 #
  36 # (*) second, when an object needs to be exposed to the xmlrpc layer, we need to convert it into a dict
  37 # remember though that writing the resulting dictionary won't change the object
  38 # essentially obj.__dict__ would be fine too, except that we want to discard alchemy private keys starting with '_'
  39 # 2 ways are provided for that:
  40 # . dict(obj)
  41 # . obj.todict()
  42 # the former dict(obj) relies on __iter__() and next() below, and does not rely on the fields names
  43 # although it seems to work fine, I've found cases where it issues a weird python error that I could not get right
  44 # so the latter obj.todict() seems more reliable but more hacky as is relies on the form of fields, so this can probably be improved
  45 #
  46 # (*) finally for converting a dictionary into an sqlalchemy object, we provide
  47 # obj.load_from_dict(dict)
  48
  49 class AlchemyObj:
  50     def __iter__(self):
  51         self._i = iter(object_mapper(self).columns)
  52         return self
  53     def next(self):
  54         n = self._i.next().name
  55         return n, getattr(self, n)
  56     def todict (self):
  57         d=self.__dict__
  58         keys=[k for k in d.keys() if not k.startswith('_')]
  59         return dict ( [ (k,d[k]) for k in keys ] )
  60     def load_from_dict (self, d):
  61         for (k,v) in d.iteritems():
  62             # experimental
  63             if isinstance(v, StringTypes) and v.lower() in ['true']: v=True
  64             if isinstance(v, StringTypes) and v.lower() in ['false']: v=False
  65             setattr(self,k,v)
  66
  67     def validate_datetime (self, key, incoming):
  68         if isinstance (incoming, datetime):     return incoming
  69         elif isinstance (incoming, (int,float)):return datetime.fromtimestamp (incoming)
  70
  71     # in addition we provide convenience for converting to and from xml records
  72     # for this purpose only, we need the subclasses to define 'fields' as either
  73     # a list or a dictionary
  74     def xml_fields (self):
  75         fields=self.fields
  76         if isinstance(fields,dict): fields=fields.keys()
  77         return fields
  78
  79     def save_as_xml (self):
  80         # xxx not sure about the scope here
  81         input_dict = dict( [ (key, getattr(self.key), ) for key in self.xml_fields() if getattr(self,key,None) ] )
  82         xml_record=XML("<record />")
  83         xml_record.parse_dict (input_dict)
  84         return xml_record.toxml()
  85
  86     def dump(self, format=None, dump_parents=False):
  87         if not format:
  88             format = 'text'
  89         else:
  90             format = format.lower()
  91         if format == 'text':
  92             self.dump_text(dump_parents)
  93         elif format == 'xml':
  94             print self.save_to_string()
  95         elif format == 'simple':
  96             print self.dump_simple()
  97         else:
  98             raise Exception, "Invalid format %s" % format
  99
 100     # xxx fixme
 101     # turns out the date_created field is received by the client as a 'created' int
 102     # (and 'last_updated' does not make it at all)
 103     # let's be flexible
 104     def date_repr (self,fields):
 105         if not isinstance(fields,list): fields=[fields]
 106         for field in fields:
 107             value=getattr(self,field,None)
 108             if isinstance (value,datetime):
 109                 return datetime_to_string (value)
 110             elif isinstance (value,(int,float)):
 111                 return datetime_to_string(utcparse(value))
 112         # fallback
 113         return "** undef_datetime **"
 114
 115     def dump_text(self, dump_parents=False):
 116         # print core fields in this order
 117         core_fields = [ 'hrn', 'type', 'authority', 'date_created', 'created', 'last_updated', 'gid',  ]
 118         print "".join(['=' for i in range(40)])
 119         print "RECORD"
 120         print "    hrn:", self.hrn
 121         print "    type:", self.type
 122         print "    authority:", self.authority
 123         print "    date created:", self.date_repr( ['date_created','created'] )
 124         print "    last updated:", self.date_repr('last_updated')
 125         print "    gid:"
 126         print self.get_gid_object().dump_string(8, dump_parents)
 127
 128         # print remaining fields
 129         for attrib_name in dir(self):
 130             attrib = getattr(self, attrib_name)
 131             # skip internals
 132             if attrib_name.startswith('_'):     continue
 133             # skip core fields
 134             if attrib_name in core_fields:      continue
 135             # skip callables
 136             if callable (attrib):               continue
 137             print "     %s: %s" % (attrib_name, attrib)
 138
 139     def dump_simple(self):
 140         return "%s"%self
 141
 142 #    # only intended for debugging
 143 #    def inspect (self, logger, message=""):
 144 #        logger.info("%s -- Inspecting AlchemyObj -- attrs"%message)
 145 #        for k in dir(self):
 146 #            if not k.startswith('_'):
 147 #                logger.info ("  %s: %s"%(k,getattr(self,k)))
 148 #        logger.info("%s -- Inspecting AlchemyObj -- __dict__"%message)
 149 #        d=self.__dict__
 150 #        for (k,v) in d.iteritems():
 151 #            logger.info("[%s]=%s"%(k,v))
 152
 153
 154 ##############################
 155 # various kinds of records are implemented as an inheritance hierarchy
 156 # RegRecord is the base class for all actual variants
 157 # a first draft was using 'type' as the discriminator for the inheritance
 158 # but we had to define another more internal column (classtype) so we
 159 # accomodate variants in types like authority+am and the like
 160
 161 class RegRecord (Base,AlchemyObj):
 162     __tablename__       = 'records'
 163     record_id           = Column (Integer, primary_key=True)
 164     # this is the discriminator that tells which class to use
 165     classtype           = Column (String)
 166     # in a first version type was the discriminator
 167     # but that could not accomodate for 'authority+sa' and the like
 168     type                = Column (String)
 169     hrn                 = Column (String)
 170     gid                 = Column (String)
 171     authority           = Column (String)
 172     peer_authority      = Column (String)
 173     pointer             = Column (Integer, default=-1)
 174     date_created        = Column (DateTime)
 175     last_updated        = Column (DateTime)
 176     # use the 'type' column to decide which subclass the object is of
 177     __mapper_args__     = { 'polymorphic_on' : classtype }
 178
 179     fields = [ 'type', 'hrn', 'gid', 'authority', 'peer_authority' ]
 180     def __init__ (self, type=None, hrn=None, gid=None, authority=None, peer_authority=None,
 181                   pointer=None, dict=None):
 182         if type:                                self.type=type
 183         if hrn:                                 self.hrn=hrn
 184         if gid:
 185             if isinstance(gid, StringTypes):    self.gid=gid
 186             else:                               self.gid=gid.save_to_string(save_parents=True)
 187         if authority:                           self.authority=authority
 188         if peer_authority:                      self.peer_authority=peer_authority
 189         if pointer:                             self.pointer=pointer
 190         if dict:                                self.load_from_dict (dict)
 191
 192     def __repr__(self):
 193         result="<Record id=%s, type=%s, hrn=%s, authority=%s, pointer=%s" % \
 194                 (self.record_id, self.type, self.hrn, self.authority, self.pointer)
 195         # skip the uniform '--- BEGIN CERTIFICATE --' stuff
 196         if self.gid: result+=" gid=%s..."%self.gid[28:36]
 197         else: result+=" nogid"
 198         result += ">"
 199         return result
 200
 201     @validates ('gid')
 202     def validate_gid (self, key, gid):
 203         if gid is None:                     return
 204         elif isinstance(gid, StringTypes):  return gid
 205         else:                               return gid.save_to_string(save_parents=True)
 206
 207     @validates ('date_created')
 208     def validate_date_created (self, key, incoming): return self.validate_datetime (key, incoming)
 209
 210     @validates ('last_updated')
 211     def validate_last_updated (self, key, incoming): return self.validate_datetime (key, incoming)
 212
 213     # xxx - there might be smarter ways to handle get/set'ing gid using validation hooks
 214     def get_gid_object (self):
 215         if not self.gid: return None
 216         else: return GID(string=self.gid)
 217
 218     def just_created (self):
 219         now=datetime.now()
 220         self.date_created=now
 221         self.last_updated=now
 222
 223     def just_updated (self):
 224         now=datetime.now()
 225         self.last_updated=now
 226
 227 ##############################
 228 # all subclasses define a convenience constructor with a default value for type,
 229 # and when applicable a way to define local fields in a kwd=value argument
 230 ####################
 231 class RegAuthority (RegRecord):
 232     __tablename__       = 'authorities'
 233     __mapper_args__     = { 'polymorphic_identity' : 'authority' }
 234     record_id           = Column (Integer, ForeignKey ("records.record_id"), primary_key=True)
 235
 236     def __init__ (self, **kwds):
 237         # fill in type if not previously set
 238         if 'type' not in kwds: kwds['type']='authority'
 239         # base class constructor
 240         RegRecord.__init__(self, **kwds)
 241
 242     # no proper data yet, just hack the typename
 243     def __repr__ (self):
 244         return RegRecord.__repr__(self).replace("Record","Authority")
 245
 246 ####################
 247 # slice x user (researchers) association
 248 slice_researcher_table = \
 249     Table ( 'slice_researcher', Base.metadata,
 250             Column ('slice_id', Integer, ForeignKey ('records.record_id'), primary_key=True),
 251             Column ('researcher_id', Integer, ForeignKey ('records.record_id'), primary_key=True),
 252             )
 253
 254 ####################
 255 class RegSlice (RegRecord):
 256     __tablename__       = 'slices'
 257     __mapper_args__     = { 'polymorphic_identity' : 'slice' }
 258     record_id           = Column (Integer, ForeignKey ("records.record_id"), primary_key=True)
 259     #### extensions come here
 260     reg_researchers     = relationship \
 261         ('RegUser',
 262          secondary=slice_researcher_table,
 263          primaryjoin=RegRecord.record_id==slice_researcher_table.c.slice_id,
 264          secondaryjoin=RegRecord.record_id==slice_researcher_table.c.researcher_id,
 265          backref="reg_slices_as_researcher")
 266
 267     def __init__ (self, **kwds):
 268         if 'type' not in kwds: kwds['type']='slice'
 269         RegRecord.__init__(self, **kwds)
 270
 271     def __repr__ (self):
 272         return RegRecord.__repr__(self).replace("Record","Slice")
 273
 274 ####################
 275 class RegNode (RegRecord):
 276     __tablename__       = 'nodes'
 277     __mapper_args__     = { 'polymorphic_identity' : 'node' }
 278     record_id           = Column (Integer, ForeignKey ("records.record_id"), primary_key=True)
 279
 280     def __init__ (self, **kwds):
 281         if 'type' not in kwds: kwds['type']='node'
 282         RegRecord.__init__(self, **kwds)
 283
 284     def __repr__ (self):
 285         return RegRecord.__repr__(self).replace("Record","Node")
 286
 287 ####################
 288 class RegUser (RegRecord):
 289     __tablename__       = 'users'
 290     # these objects will have type='user' in the records table
 291     __mapper_args__     = { 'polymorphic_identity' : 'user' }
 292     record_id           = Column (Integer, ForeignKey ("records.record_id"), primary_key=True)
 293     #### extensions come here
 294     email               = Column ('email', String)
 295     # can't use name 'keys' here because when loading from xml we're getting
 296     # a 'keys' tag, and assigning a list of strings in a reference column like this crashes
 297     reg_keys            = relationship \
 298         ('RegKey', backref='reg_user',
 299          cascade="all, delete, delete-orphan")
 300
 301     # so we can use RegUser (email=.., hrn=..) and the like
 302     def __init__ (self, **kwds):
 303         # handle local settings
 304         if 'email' in kwds: self.email=kwds.pop('email')
 305         if 'type' not in kwds: kwds['type']='user'
 306         RegRecord.__init__(self, **kwds)
 307
 308     # append stuff at the end of the record __repr__
 309     def __repr__ (self):
 310         result = RegRecord.__repr__(self).replace("Record","User")
 311         result.replace (">"," email=%s"%self.email)
 312         result += ">"
 313         return result
 314
 315     @validates('email')
 316     def validate_email(self, key, address):
 317         assert '@' in address
 318         return address
 319
 320 ####################
 321 # xxx tocheck : not sure about eager loading of this one
 322 # meaning, when querying the whole records, we expect there should
 323 # be a single query to fetch all the keys
 324 # or, is it enough that we issue a single query to retrieve all the keys
 325 class RegKey (Base):
 326     __tablename__       = 'keys'
 327     key_id              = Column (Integer, primary_key=True)
 328     record_id             = Column (Integer, ForeignKey ("records.record_id"))
 329     key                 = Column (String)
 330     pointer             = Column (Integer, default = -1)
 331
 332     def __init__ (self, key, pointer=None):
 333         self.key=key
 334         if pointer: self.pointer=pointer
 335
 336     def __repr__ (self):
 337         result="<key id=%s key=%s..."%(self.key_id,self.key[8:16],)
 338         try:    result += " user=%s"%self.reg_user.record_id
 339         except: result += " no-user"
 340         result += ">"
 341         return result
 342
 343 ##############################
 344 # although the db needs of course to be reachable for the following functions
 345 # the schema management functions are here and not in alchemy
 346 # because the actual details of the classes need to be known
 347 # migrations: this code has no notion of the previous versions
 348 # of the data model nor of migrations
 349 # sfa.storage.migrations.db_init uses this when starting from
 350 # a fresh db only
 351 def init_tables(engine):
 352     logger.info("Initializing db schema from current/latest model")
 353     Base.metadata.create_all(engine)
 354
 355 def drop_tables(engine):
 356     logger.info("Dropping tables from current/latest model")
 357     Base.metadata.drop_all(engine)
 358
 359 ##############################
 360 # create a record of the right type from either a dict or an xml string
 361 def make_record (dict={}, xml=""):
 362     if dict:    return make_record_dict (dict)
 363     elif xml:   return make_record_xml (xml)
 364     else:       raise Exception("make_record has no input")
 365
 366 # convert an incoming record - typically from xmlrpc - into an object
 367 def make_record_dict (record_dict):
 368     assert ('type' in record_dict)
 369     type=record_dict['type'].split('+')[0]
 370     if type=='authority':
 371         result=RegAuthority (dict=record_dict)
 372     elif type=='user':
 373         result=RegUser (dict=record_dict)
 374     elif type=='slice':
 375         result=RegSlice (dict=record_dict)
 376     elif type=='node':
 377         result=RegNode (dict=record_dict)
 378     else:
 379         logger.debug("Untyped RegRecord instance")
 380         result=RegRecord (dict=record_dict)
 381     logger.info ("converting dict into Reg* with type=%s"%type)
 382     logger.info ("returning=%s"%result)
 383     # xxx todo
 384     # register non-db attributes in an extensions field
 385     return result
 386
 387 def make_record_xml (xml):
 388     xml_record = XML(xml)
 389     xml_dict = xml_record.todict()
 390     logger.info("load from xml, keys=%s"%xml_dict.keys())
 391     return make_record_dict (xml_dict)
 392