+### a very simple cache mechanism so that successive runs (see make)
+### will go *much* faster
+### assuming everything is sequential, as simple as it gets
+### { url -> (timestamp,version)}
+class VersionCache:
+ def __init__ (self, filename=None, expires=60*60):
+ # default is to store cache in the same dir as argv[0]
+ if filename is None:
+ filename=os.path.join(os.path.dirname(sys.argv[0]),"sfascan-version-cache.pickle")
+ self.filename=filename
+ self.expires=expires
+ self.url2version={}
+ self.load()
+
+ def load (self):
+ try:
+ infile=file(self.filename,'r')
+ self.url2version=pickle.load(infile)
+ infile.close()
+ except:
+ logger.debug("Cannot load version cache, restarting from scratch")
+ self.url2version = {}
+ logger.debug("loaded version cache with %d entries %s"%(len(self.url2version),self.url2version.keys()))
+
+ def save (self):
+ try:
+ outfile=file(self.filename,'w')
+ pickle.dump(self.url2version,outfile)
+ outfile.close()
+ except:
+ logger.log_exc ("Cannot save version cache into %s"%self.filename)
+ def clean (self):
+ try:
+ retcod=os.unlink(self.filename)
+ logger.info("Cleaned up version cache %s, retcod=%d"%(self.filename,retcod))
+ except:
+ logger.info ("Could not unlink version cache %s"%self.filename)
+
+ def show (self):
+ entries=len(self.url2version)
+ print "version cache from file %s has %d entries"%(self.filename,entries)
+ key_values=self.url2version.items()
+ def old_first (kv1,kv2): return int(kv1[1][0]-kv2[1][0])
+ key_values.sort(old_first)
+ for key_value in key_values:
+ (url,tuple) = key_value
+ (timestamp,version) = tuple
+ how_old = time.time()-timestamp
+ if how_old<=self.expires:
+ print url,"-- %d seconds ago"%how_old
+ else:
+ print "OUTDATED",url,"(%d seconds ago, expires=%d)"%(how_old,self.expires)
+
+ # turns out we might have trailing slashes or not
+ def normalize (self, url):
+ return url.strip("/")
+
+ def set (self,url,version):
+ url=self.normalize(url)
+ self.url2version[url]=( time.time(), version)
+ def get (self,url):
+ url=self.normalize(url)
+ try:
+ (timestamp,version)=self.url2version[url]
+ how_old = time.time()-timestamp
+ if how_old<=self.expires: return version
+ else: return None
+ except:
+ return None
+