X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=scripts%2Fclean-backupdb.py;h=1d993c6dea312804db32ff7181d59fbee6d59fdf;hb=a8aa1baef6f4d1df60f44d1d542912acff913226;hp=106ba94a97aec9c36e44560ae652a1dde183d6ab;hpb=fe75d10e9a6eaa1d40681178c0f5d742cee71464;p=infrastructure.git diff --git a/scripts/clean-backupdb.py b/scripts/clean-backupdb.py index 106ba94..1d993c6 100755 --- a/scripts/clean-backupdb.py +++ b/scripts/clean-backupdb.py @@ -18,6 +18,9 @@ import traceback from optparse import OptionParser +now=datetime.now() +counter=0 + class FileIgnored (Exception): pass # in days @@ -58,8 +61,6 @@ def parse_filename (filename): return parsing_failed # one entry like this per file, managed in the Kind class -now=datetime.now() - class File: def __init__ (self, dir, filename, datetime, options): @@ -70,12 +71,15 @@ class File: self.age=now-datetime self.weekday=self.datetime.weekday() if self.age.days<0: - if self.options.verbose: print 'Filename %s is from the future - skipped'%sfilename + if self.options.verbose: print 'Filename %s is from the future - skipped'%filename raise FileIgnored,"Filename from the future %s"%filename self.group = self._group_string() def __repr__ (self): - return "%s (%s) -- weekday %s"%(self.filename,self.datetime,self.datetime.weekday()) + return "%s (%s) -- weekday %s"%(self.path(),self.datetime,self.datetime.weekday()) + + def path (self): + return os.path.normpath(os.path.join(self.dir,self.filename)) def age_days (self): return self.age.days @@ -144,18 +148,38 @@ class File: weeks += 1 return "week%02d"%weeks + def cleanup (self, preserved): + global counter + counter+=1 + src = os.path.abspath(os.path.basename(self.path())); + if self.options.destination: + dst = os.path.abspath(self.options.destination) + '/' + os.path.basename(self.path()) + if self.options.verbose: + print "moving %s\n\tto %s"%(self.path(), dst) + if not self.options.dry_run: + os.rename (src, dst) + else: + if self.options.verbose: + print "Would cleanup %s"%(src) + print " (keeping %s)"%preserved.path() + if not self.options.dry_run: + if self.options.verbose: print "unlink",src + os.unlink (src) + # all files in a given timeslot (either month or week) class Group: def __init__ (self, groupname): self.groupname=groupname self.files=[] + self.count = 0 def insert (self, file): self.files.append(file) def epilogue (self): self.files.sort (File.sort_relevance) -# print 20*'*','after sort' -# for file in self.files: -# print "%s"%file + def keep_one (self): + for file in self.files[1:]: + file.cleanup(self.files[0]) + self.count += 1 # all files with the same (prefix, suffix) class Kind: @@ -164,6 +188,9 @@ class Kind: self.prefix=prefix self.suffix=suffix self.options=options + self.todelete = 0 + self.oldest = None + self.newest = None # will contain tuples (filename, datetime) self.list = [] @@ -171,6 +198,9 @@ class Kind: def add_file (self, dir, filename, datetime): try: self.list.append ( File (dir, filename, datetime, self.options) ) + self.newest = datetime + if not self.oldest: + self.oldest = datetime except FileIgnored: pass except: print 'could not append %s'%filename @@ -191,6 +221,7 @@ class Kind: group.epilogue() def show (self): + if not self.options.verbose: return print 30*'-',"%s-.%s"%(self.prefix,self.suffix) entries=len(self.list) print " %d entries" % entries, @@ -220,6 +251,14 @@ class Kind: def sort_size (k1, k2): return len(k1.list)-len(k2.list) + def cleanup (self): + groupnames=self.groups.keys() + groupnames.sort() + for groupname in groupnames: + if self.options.extra_verbose: print 'GROUP',groupname + self.groups[groupname].keep_one() + self.todelete += self.groups[groupname].count + # keeps an index of all files found, index by (prefix, suffix), then sorted by time class Index: def __init__ (self,options): @@ -248,11 +287,21 @@ class Index: for filename in filenames: (b,p,s,d) = parse_filename (filename) if not b: - print "Filename %s does not match - skipped"%filename + if self.options.verbose: + print "Filename %s does not match - skipped"%filename continue self.insert (dir, filename, p, s, d) -def handle_arg (index, dir, pattern): + def cleanup (self): + for kind in self.index.values(): + kind.cleanup() + + def summary (self): + print "%-30s%-10s%10s%25s%25s"%("Prefix","Suffix","Num (Del)","Oldest","Newest") + for kind in self.index.values(): + print "%-30s%-10s%3s (%3s) %30s%30s"%(kind.prefix, kind.suffix, len(kind.list), kind.todelete, kind.oldest, kind.newest) + +def handle_dir_pattern (index, dir, pattern): try: os.chdir(dir) except: @@ -262,20 +311,23 @@ def handle_arg (index, dir, pattern): index.insert_many (dir, filenames) def main (): - parser=OptionParser() + usage="Usage: %prog [options] dir_or_files" + parser=OptionParser(usage=usage) parser.add_option ("-v","--verbose",dest='verbose',action='store_true',default=False, help="run in verbose mode") parser.add_option ("-x","--extra-verbose",dest='extra_verbose',action='store_true',default=False, help="run in extra verbose mode") parser.add_option ("-n","--dry-run",dest='dry_run',action='store_true',default=False, help="dry run") + parser.add_option ("-m","--move-to",dest='destination',action='store',type='string',default=False, + help="move to instead of removing the file") parser.add_option ("-o","--offset",dest='offset',action='store',type='int',default=0, help="pretend we run days in the future") + parser.add_option ("-s","--summary",dest='summary',action='store_true',default=False, + help="print a summary") (options, args) = parser.parse_args() if options.extra_verbose: options.verbose=True try: - #options.offset=int(options.offset) - print 'offset=%d'%options.offset if options.offset !=0: global now now += timedelta(days=options.offset) @@ -284,19 +336,36 @@ def main (): print "Offset not understood %s - expect an int. number of days"%options.offset sys.exit(1) + if options.destination and not os.path.isdir(options.destination): + print "Destination should be a directory" + sys.exit(1) + + if len(args) ==0: + parser.print_help() + sys.exit(1) + # args can be directories, or patterns, like # main /db-backup /db-backup-f8/*bz2 # in any case we handle each arg completely separately - index = Index (options) + dir_patterns=[] for arg in args: if os.path.isdir (arg): - handle_arg (index, arg, "*") + if arg=='.': arg=os.getcwd() + dir_patterns.append ( (arg, '*',) ) else: (dir,pattern)=os.path.split(arg) - if not dir: dir='.' - handle_arg (index, dir, pattern) + if not dir: dir=os.getcwd() + dir_patterns.append ( (dir, pattern,) ) + + index = Index (options) + for (dir, pattern) in dir_patterns: handle_dir_pattern (index, dir, pattern) index.epilogue() index.show() + index.cleanup() + if (options.summary) : + index.summary() + if options.verbose: + print 'Found %d entries to unlink'%counter if __name__ == '__main__': main()