From 2a56dbb661438fd16be2b574897f7c3e92ab05e9 Mon Sep 17 00:00:00 2001 From: Thierry Parmentelat Date: Tue, 5 Dec 2006 16:45:03 +0000 Subject: [PATCH] Cache: (*) sync only when needed - drastic improvements on peering refresh when up2date (*) reverted Tony's change in Cache (*) returns a lot of timing info in the 'timers' key Other classes: (*) removes refreshing of timestamps - were read-only and broke sync optim. --- PLC/Cache.py | 86 +++++++++++++++++++++++++++++++++++++++----------- PLC/Nodes.py | 7 ++-- PLC/Persons.py | 7 ++-- PLC/Sites.py | 5 ++- PLC/Slices.py | 5 ++- 5 files changed, 85 insertions(+), 25 deletions(-) diff --git a/PLC/Cache.py b/PLC/Cache.py index e949599..d6d449d 100644 --- a/PLC/Cache.py +++ b/PLC/Cache.py @@ -92,12 +92,22 @@ class Cache: self.api.db.do (sql) def insert_new_items (self, id1, id2_set): - if id2_set: - sql = "INSERT INTO %s select %d, %d " % \ - self.tablename, id1, id2[0] - for id2 in id2_set[1:]: - sql += " UNION ALL SELECT %d, %d " % \ - (id1,id2) + ### xxx needs to be optimized + ### tried to figure a way to use a single sql statement + ### like: insert into table (x,y) values (1,2),(3,4); + ### but apparently this is not supported under postgresql + for id2 in id2_set: + sql = "INSERT INTO %s (%s_id,%s_id) VALUES (%d,%d)"% \ + (self.tablename,self.lowerclass1,self.lowerclass2,id1,id2) + +# below is Tony's code but it's badly broken. I'm not sure we care in fact. +# if id2_set: +# sql = "INSERT INTO %s select %d, %d " % \ +# self.tablename, id1, id2[0] +# for id2 in id2_set[1:]: +# sql += " UNION ALL SELECT %d, %d " % \ +# (id1,id2) + self.api.db.do (sql) def update_item (self, id1, old_id2s, new_id2s): @@ -152,8 +162,6 @@ class Cache: ### index upon class_key for future searches local_objects_index = local_objects.dict(class_key) - #verbose ('update_table',classname,local_objects_index.keys()) - ### mark entries for this peer outofdate new_count=0 old_count=0; @@ -176,6 +184,9 @@ class Cache: verbose ('update_table (%s) - Considering'%classname,object_name) + # optimizing : avoid doing useless syncs + needs_sync = False + # create or update try: ### We know about this object already @@ -195,9 +206,12 @@ class Cache: ### we can assume the object just moved ### needs to update peer_id though local_object['peer_id'] = peer_id + needs_sync = True # update all fields as per foreign_fields for field in foreign_fields: - local_object[field]=alien_object[field] + if (local_object[field] != alien_object [field]): + local_object[field]=alien_object[field] + needs_sync = True verbose ('update_table FOUND',object_name) except: ### create a new entry @@ -219,7 +233,7 @@ class Cache: direct_ref_fields=[] for xref in foreign_xrefs: field=xref['field'] - verbose('checking field %s for direct_ref'%field) + #verbose('checking field %s for direct_ref'%field) if isinstance(alien_object[field],int): direct_ref_fields.append(field) verbose("FOUND DIRECT REFS",direct_ref_fields) @@ -227,6 +241,7 @@ class Cache: local_object[field]=1 verbose('Early sync on',local_object) local_object.sync() + needs_sync = False # this row is now valid local_object.uptodate=True @@ -260,13 +275,15 @@ class Cache: elif isinstance (alien_value,int): #verbose ('update_table atom-transcoding ',xref['class'],' aliens=',alien_value,) new_value = transcoder.transcode(alien_value) - local_object[field] = new_value + if local_object[field] != new_value: + local_object[field] = new_value + needs_sync = True ### this object is completely updated, let's save it - verbose('FINAL sync on %s:'%object_name,local_object) - local_object.sync() + if needs_sync: + verbose('FINAL sync on %s:'%object_name,local_object) + local_object.sync(False) - ### delete entries that are not uptodate for local_object in local_objects: if not local_object.uptodate: @@ -365,7 +382,7 @@ class Cache: verbose('CREATED new sa') local_object.uptodate=True new_count += 1 - local_object.sync() + local_object.sync(False) for local_object in local_objects: if not local_object.uptodate: @@ -385,6 +402,7 @@ class Cache: ### uses GetPeerData to gather all info in a single xmlrpc request + timers={} t_start=time.time() # xxx see also GetPeerData - peer_id arg unused yet all_data = self.peer_server.GetPeerData (self.auth,0) @@ -395,29 +413,49 @@ class Cache: all_sites = plocal_sites + all_data['Sites-peer'] nb_new_sites = self.update_table('Site', plocal_sites) + t0 = time.time() + timers['process-sites']=t0-t_acquired + + # refresh keys plocal_keys = all_data['Keys-local'] all_keys = plocal_keys + all_data['Keys-peer'] nb_new_keys = self.update_table('Key', plocal_keys) + t=time.time() + timers['process-keys']=t-t0 + t0=t + # refresh nodes plocal_nodes = all_data['Nodes-local'] all_nodes = plocal_nodes + all_data['Nodes-peer'] nb_new_nodes = self.update_table('Node', plocal_nodes, { 'Site' : all_sites } ) + t=time.time() + timers['process-nodes']=t-t0 + t0=t + # refresh persons plocal_persons = all_data['Persons-local'] all_persons = plocal_persons + all_data['Persons-peer'] nb_new_persons = self.update_table ('Person', plocal_persons, { 'Key': all_keys, 'Site' : all_sites } ) + t=time.time() + timers['process-persons']=t-t0 + t0=t + # refresh slice attribute types plocal_slice_attribute_types = all_data ['SliceAttibuteTypes-local'] nb_new_slice_attribute_types = self.update_table ('SliceAttributeType', plocal_slice_attribute_types, report_name_conflicts = False) + t=time.time() + timers['process-sat']=t-t0 + t0=t + # refresh slices plocal_slices = all_data['Slices-local'] all_slices = plocal_slices + all_data['Slices-peer'] @@ -431,13 +469,26 @@ class Cache: 'Site': all_sites}, is_system_slice) + t=time.time() + timers['process-slices']=t-t0 + t0=t + # refresh slice attributes plocal_slice_attributes = all_data ['SliceAttributes-local'] nb_new_slice_attributes = self.update_slice_attributes (plocal_slice_attributes, all_nodes, all_slices) + t=time.time() + timers['process-sa']=t-t0 + t0=t t_end=time.time() + + timers['time_gather'] = all_data['ellapsed'] + timers['time_transmit'] = t_acquired-t_start-all_data['ellapsed'] + timers['time_process'] = t_end-t_acquired + timers['time_all'] = t_end-t_start + ### returned as-is by RefreshPeer return {'plcname':self.api.config.PLC_NAME, 'new_sites':nb_new_sites, @@ -447,9 +498,6 @@ class Cache: 'new_slice_attribute_types':nb_new_slice_attribute_types, 'new_slices':nb_new_slices, 'new_slice_attributes':nb_new_slice_attributes, - 'time_gather': all_data['ellapsed'], - 'time_transmit':t_acquired-t_start-all_data['ellapsed'], - 'time_process':t_end-t_acquired, - 'time_all':t_end-t_start, + 'timers':timers, } diff --git a/PLC/Nodes.py b/PLC/Nodes.py index 4c1c695..769a9da 100644 --- a/PLC/Nodes.py +++ b/PLC/Nodes.py @@ -4,7 +4,7 @@ # Mark Huang # Copyright (C) 2006 The Trustees of Princeton University # -# $Id: Nodes.py,v 1.26 2006/11/28 14:55:00 thierry Exp $ +# $Id: Nodes.py,v 1.27 2006/11/30 10:12:01 thierry Exp $ # from types import StringTypes @@ -64,7 +64,10 @@ class Node(Row): # for Cache class_key = 'hostname' - foreign_fields = ['boot_state','model','version','date_created','last_updated'] + foreign_fields = ['boot_state','model','version'] + # forget about these ones, they are read-only anyway + # handling them causes Cache to re-sync all over again + # 'date_created','last_updated' foreign_xrefs = [ # in this case, we dont need the 'table' but Cache will look it up, so... {'field' : 'site_id' , 'class' : 'Site' , 'table' : 'unused-on-direct-refs' } , diff --git a/PLC/Persons.py b/PLC/Persons.py index fd5645c..2f77d90 100644 --- a/PLC/Persons.py +++ b/PLC/Persons.py @@ -4,7 +4,7 @@ # Mark Huang # Copyright (C) 2006 The Trustees of Princeton University # -# $Id: Persons.py,v 1.22 2006/11/28 14:55:00 thierry Exp $ +# $Id: Persons.py,v 1.23 2006/11/30 10:12:01 thierry Exp $ # from types import StringTypes @@ -56,7 +56,10 @@ class Person(Row): # for Cache class_key = 'email' foreign_fields = ['first_name', 'last_name', 'title', 'email', 'phone', 'url', - 'bio', 'enabled', 'password', 'last_updated', 'date_created'] + 'bio', 'enabled', 'password', ] + # forget about these ones, they are read-only anyway + # handling them causes Cache to re-sync all over again + # 'last_updated', 'date_created' foreign_xrefs = [ {'field' : 'key_ids', 'class': 'Key', 'table' : 'person_key' } , {'field' : 'site_ids', 'class': 'Site', 'table' : 'person_site'}, diff --git a/PLC/Sites.py b/PLC/Sites.py index a30c49b..1db5b13 100644 --- a/PLC/Sites.py +++ b/PLC/Sites.py @@ -46,8 +46,11 @@ class Site(Row): # for Cache class_key = 'login_base' foreign_fields = ['abbreviated_name', 'name', 'is_public', 'latitude', 'longitude', - 'url', 'date_created', 'last_updated', 'max_slices', 'max_slivers', + 'url', 'max_slices', 'max_slivers', ] + # forget about these ones, they are read-only anyway + # handling them causes Cache to re-sync all over again + # 'last_updated', 'date_created' foreign_xrefs = [] def validate_name(self, name): diff --git a/PLC/Slices.py b/PLC/Slices.py index 2add8ac..5e2b2e2 100644 --- a/PLC/Slices.py +++ b/PLC/Slices.py @@ -39,13 +39,16 @@ class Slice(Row): } # for Cache class_key = 'name' - foreign_fields = ['instantiation', 'url', 'description', 'max_nodes', 'created', 'expires'] + foreign_fields = ['instantiation', 'url', 'description', 'max_nodes', 'expires'] foreign_xrefs = [ {'field': 'node_ids' , 'class': 'Node', 'table': 'slice_node' }, {'field': 'person_ids', 'class': 'Person', 'table': 'slice_person'}, {'field': 'creator_person_id', 'class': 'Person', 'table': 'unused-on-direct-refs'}, {'field': 'site_id', 'class': 'Site', 'table': 'unused-on-direct-refs'}, ] + # forget about this one, it is read-only anyway + # handling it causes Cache to re-sync all over again + # 'created' def validate_name(self, name): # N.B.: Responsibility of the caller to ensure that login_base -- 2.43.0