f24 only
[infrastructure.git] / scripts / clean-backupdb.py
index 45e5026..1d993c6 100755 (executable)
@@ -9,14 +9,18 @@
 # (*) recent stuff: unchanged
 # 
 
+import sys
 import os, os.path
-from datetime import datetime
+from datetime import datetime, timedelta
 from glob import glob
 import re
 import traceback
 
 from optparse import OptionParser
 
+now=datetime.now()
+counter=0
+
 class FileIgnored (Exception): pass
 
 # in days
@@ -57,8 +61,6 @@ def parse_filename (filename):
             return parsing_failed
 
 # one entry like this per file, managed in the Kind class
-now=datetime.now()
-
 class File:
 
     def __init__ (self, dir, filename, datetime, options):
@@ -69,19 +71,19 @@ class File:
         self.age=now-datetime
         self.weekday=self.datetime.weekday()
         if self.age.days<0:
-            if self.options.verbose: print 'Filename %s is from the future - skipped'%sfilename
+            if self.options.verbose: print 'Filename %s is from the future - skipped'%filename
             raise FileIgnored,"Filename from the future %s"%filename
         self.group = self._group_string()
 
     def __repr__ (self):
-        return "%s (%s) -- weekday %s"%(self.filename,self.datetime,self.datetime.weekday())
+        return "%s (%s) -- weekday %s"%(self.path(),self.datetime,self.datetime.weekday())
+
+    def path (self):
+        return os.path.normpath(os.path.join(self.dir,self.filename))
 
     def age_days (self):
         return self.age.days
 
-    def age_weeks (self):
-        return self.age.days/7
-
     # oldest first
     @staticmethod
     def sort_age (file1, file2):
@@ -105,23 +107,31 @@ class File:
         elif w1!=PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
             return 1
     
-    month_marrier=None
+    month_barrier=None
+    week_barrier=None
     @staticmethod
-    def compute_month_barrier ():
-        if not File.month_marrier: 
-            # find the exact datetime for a month change 
-            # KEEP_FILES_IN_MONTH_AFTER months ago +
-            year=now.year
-            month=now.month
-            day=1
-            if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
-                month -= KEEP_FILES_IN_MONTH_AFTER
-            else:
-                year -= 1
-                month += (12-KEEP_FILES_IN_MONTH_AFTER)
-            File.month_marrier = datetime (year=year, month=month, day=day)
-                
-        return File.month_marrier
+    def _compute_barriers ():
+        if File.month_barrier:
+            return
+        # find the exact datetime for a month change 
+        # KEEP_FILES_IN_MONTH_AFTER months ago +
+        year=now.year
+        month=now.month
+        day=1
+        if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
+            month -= KEEP_FILES_IN_MONTH_AFTER
+        else:
+            year -= 1
+            month += (12-KEEP_FILES_IN_MONTH_AFTER)
+        File.month_barrier = datetime (year=year, month=month, day=day)
+        # find the next monday morning
+        remaining_days=(7-File.month_barrier.weekday())%7
+        File.week_barrier=File.month_barrier+timedelta(days=remaining_days)
+
+    @staticmethod
+    def compute_month_barrier(): File._compute_barriers(); return File.month_barrier
+    @staticmethod
+    def compute_week_barrier(): File._compute_barriers(); return File.week_barrier
 
     # returns a key for grouping files, the cleanup then
     # preserving one entry in the set of files with same group
@@ -134,20 +144,42 @@ class File:
         if self.datetime <= File.compute_month_barrier():
             return self.datetime.strftime("%Y%m")
         else:
-            return "week%d"%self.age_weeks()
+            weeks=(self.datetime-File.compute_week_barrier()).days/7
+            weeks += 1
+            return "week%02d"%weeks
+
+    def cleanup (self, preserved):
+        global counter
+        counter+=1
+        src = os.path.abspath(os.path.basename(self.path()));
+        if self.options.destination:
+            dst = os.path.abspath(self.options.destination) + '/' + os.path.basename(self.path())
+            if self.options.verbose:
+                print "moving %s\n\tto %s"%(self.path(), dst)
+            if not self.options.dry_run:
+                os.rename (src, dst)
+        else:
+            if self.options.verbose:
+               print "Would cleanup %s"%(src)
+               print "    (keeping %s)"%preserved.path()
+            if not self.options.dry_run:
+                if self.options.verbose: print "unlink",src
+                os.unlink (src)
 
 # all files in a given timeslot (either month or week)
 class Group:
     def __init__ (self, groupname):
         self.groupname=groupname
         self.files=[]
+        self.count = 0
     def insert (self, file):
         self.files.append(file)
     def epilogue (self):
         self.files.sort (File.sort_relevance)
-#        print 20*'*','after sort'
-#        for file in self.files:
-#            print "%s"%file
+    def keep_one (self):
+        for file in self.files[1:]:
+            file.cleanup(self.files[0])
+            self.count += 1
 
 # all files with the same (prefix, suffix)
 class Kind:
@@ -156,6 +188,9 @@ class Kind:
         self.prefix=prefix
         self.suffix=suffix
         self.options=options
+        self.todelete = 0
+        self.oldest = None
+        self.newest = None
         # will contain tuples (filename, datetime)
         self.list = []
 
@@ -163,6 +198,9 @@ class Kind:
     def add_file (self, dir, filename, datetime):
         try:
             self.list.append ( File (dir, filename, datetime, self.options) )
+            self.newest = datetime
+            if not self.oldest:
+                self.oldest = datetime
         except FileIgnored: pass
         except:
             print 'could not append %s'%filename
@@ -183,6 +221,7 @@ class Kind:
             group.epilogue()
 
     def show (self):
+        if not self.options.verbose: return
         print 30*'-',"%s-<date>.%s"%(self.prefix,self.suffix)
         entries=len(self.list)
         print " %d entries" % entries,
@@ -191,7 +230,7 @@ class Kind:
             print " << %s - %s d old"%(f.filename, f.age_days()),
         if entries >=2:
             f=self.list[-1]
-            print ">> %s - %s d old"%(f.filename, f.age_days())
+            print "|| %s - %s d old >>"%(f.filename, f.age_days())
         groupnames=self.groups.keys()
         groupnames.sort()
         groupnames.reverse()
@@ -204,7 +243,7 @@ class Kind:
                     print "    %s"%file
         elif self.options.verbose:
             print " Found %d groups"%len(groupnames),
-            for g in groupnames: print "%s->%d"%(k,len(self.groups[g].files)),
+            for g in groupnames: print "%s->%d"%(g,len(self.groups[g].files)),
             print ''
 
     # sort on number of entries
@@ -212,6 +251,14 @@ class Kind:
     def sort_size (k1, k2):
         return len(k1.list)-len(k2.list)
 
+    def cleanup (self):
+        groupnames=self.groups.keys()
+        groupnames.sort()
+        for groupname in groupnames:
+            if self.options.extra_verbose: print 'GROUP',groupname
+            self.groups[groupname].keep_one()
+            self.todelete += self.groups[groupname].count
+
 # keeps an index of all files found, index by (prefix, suffix), then sorted by time
 class Index:
     def __init__ (self,options):
@@ -240,11 +287,21 @@ class Index:
         for filename in filenames:
             (b,p,s,d) = parse_filename (filename)
             if not b:
-                print "Filename %s does not match - skipped"%filename
+               if self.options.verbose:
+                    print "Filename %s does not match - skipped"%filename
                 continue
             self.insert (dir, filename, p, s, d)
 
-def handle_arg (index, dir, pattern):
+    def cleanup (self):
+        for kind in self.index.values():
+            kind.cleanup()
+            
+    def summary (self):
+        print "%-30s%-10s%10s%25s%25s"%("Prefix","Suffix","Num (Del)","Oldest","Newest")
+        for kind in self.index.values():
+            print "%-30s%-10s%3s  (%3s) %30s%30s"%(kind.prefix, kind.suffix, len(kind.list), kind.todelete, kind.oldest, kind.newest)
+
+def handle_dir_pattern (index, dir, pattern):
     try:
         os.chdir(dir)
     except:
@@ -254,29 +311,61 @@ def handle_arg (index, dir, pattern):
     index.insert_many (dir, filenames)
 
 def main ():
-    parser=OptionParser()
+    usage="Usage: %prog [options] dir_or_files"
+    parser=OptionParser(usage=usage)
     parser.add_option ("-v","--verbose",dest='verbose',action='store_true',default=False,
                        help="run in verbose mode")
     parser.add_option ("-x","--extra-verbose",dest='extra_verbose',action='store_true',default=False,
                        help="run in extra verbose mode")
     parser.add_option ("-n","--dry-run",dest='dry_run',action='store_true',default=False,
                        help="dry run")
+    parser.add_option ("-m","--move-to",dest='destination',action='store',type='string',default=False,
+                       help="move to <destination> instead of removing the file")
+    parser.add_option ("-o","--offset",dest='offset',action='store',type='int',default=0,
+                       help="pretend we run <offset> days in the future")
+    parser.add_option ("-s","--summary",dest='summary',action='store_true',default=False,
+                       help="print a summary")
     (options, args) = parser.parse_args()
     if options.extra_verbose: options.verbose=True
+    try:
+        if options.offset !=0:
+            global now
+            now += timedelta(days=options.offset)
+    except:
+        traceback.print_exc()
+        print "Offset not understood %s - expect an int. number of days"%options.offset
+        sys.exit(1)
     
+    if options.destination and not os.path.isdir(options.destination):
+        print "Destination should be a directory"
+        sys.exit(1)
+
+    if len(args) ==0:
+        parser.print_help()
+        sys.exit(1)
+
     # args can be directories, or patterns, like 
     # main /db-backup /db-backup-f8/*bz2
     # in any case we handle each arg completely separately
-    index = Index (options)
+    dir_patterns=[]
     for arg in args:
         if os.path.isdir (arg):
-            handle_arg (index, arg, "*")
+            if arg=='.': arg=os.getcwd()
+            dir_patterns.append ( (arg, '*',) )
         else:
             (dir,pattern)=os.path.split(arg)
-            if not dir: dir='.'
-            handle_arg (index, dir, pattern)
+            if not dir: dir=os.getcwd()
+            dir_patterns.append ( (dir, pattern,) )
+
+    index = Index (options)
+    for (dir, pattern) in dir_patterns: handle_dir_pattern (index, dir, pattern)
     index.epilogue()
     index.show()
+    index.cleanup()
+    if (options.summary) :
+        index.summary()
+    if options.verbose:
+        print 'Found %d entries to unlink'%counter
             
 if __name__ == '__main__':
     main()