+ return (url, parts[0], parts[1])
+
+# a very simple cache mechanism so that successive runs (see make)
+# will go *much* faster
+# assuming everything is sequential, as simple as it gets
+# { url -> (timestamp, version)}
+
+
+class VersionCache:
+ # default expiration period is 1h
+
+ def __init__(self, filename=None, expires=60 * 60):
+ # default is to store cache in the same dir as argv[0]
+ if filename is None:
+ filename = os.path.join(os.path.dirname(
+ sys.argv[0]), "sfascan-version-cache.pickle")
+ self.filename = filename
+ self.expires = expires
+ self.url2version = {}
+ self.load()
+
+ def load(self):
+ try:
+ infile = open(self.filename, 'r')
+ self.url2version = pickle.load(infile)
+ infile.close()
+ except:
+ logger.debug("Cannot load version cache, restarting from scratch")
+ self.url2version = {}
+ logger.debug("loaded version cache with %d entries %s" % (len(self.url2version),
+ self.url2version.keys()))
+
+ def save(self):
+ try:
+ outfile = open(self.filename, 'w')
+ pickle.dump(self.url2version, outfile)
+ outfile.close()
+ except:
+ logger.log_exc("Cannot save version cache into %s" % self.filename)
+
+ def clean(self):
+ try:
+ retcod = os.unlink(self.filename)
+ logger.info("Cleaned up version cache %s, retcod=%d" %
+ (self.filename, retcod))
+ except:
+ logger.info("Could not unlink version cache %s" % self.filename)
+
+ def show(self):
+ entries = len(self.url2version)
+ print("version cache from file %s has %d entries" %
+ (self.filename, entries))
+ key_values = self.url2version.items()
+
+ def old_first(kv1, kv2): return int(kv1[1][0] - kv2[1][0])
+ key_values.sort(old_first)
+ for key_value in key_values:
+ (url, tuple) = key_value
+ (timestamp, version) = tuple
+ how_old = time.time() - timestamp
+ if how_old <= self.expires:
+ print(url, "-- %d seconds ago" % how_old)
+ else:
+ print("OUTDATED", url, "(%d seconds ago, expires=%d)" %
+ (how_old, self.expires))
+
+ # turns out we might have trailing slashes or not
+ def normalize(self, url):
+ return url.strip("/")
+
+ def set(self, url, version):
+ url = self.normalize(url)
+ self.url2version[url] = (time.time(), version)
+
+ def get(self, url):
+ url = self.normalize(url)
+ try:
+ (timestamp, version) = self.url2version[url]
+ how_old = time.time() - timestamp
+ if how_old <= self.expires:
+ return version
+ else:
+ return None
+ except:
+ return None