# (*) recent stuff: unchanged
#
+import sys
import os, os.path
-from datetime import datetime
+from datetime import datetime, timedelta
from glob import glob
import re
import traceback
from optparse import OptionParser
+now=datetime.now()
+counter=0
+
class FileIgnored (Exception): pass
# in days
return parsing_failed
# one entry like this per file, managed in the Kind class
-now=datetime.now()
-
class File:
def __init__ (self, dir, filename, datetime, options):
self.age=now-datetime
self.weekday=self.datetime.weekday()
if self.age.days<0:
- if self.options.verbose: print 'Filename %s is from the future - skipped'%sfilename
+ if self.options.verbose: print 'Filename %s is from the future - skipped'%filename
raise FileIgnored,"Filename from the future %s"%filename
self.group = self._group_string()
def __repr__ (self):
- return "%s (%s) -- weekday %s"%(self.filename,self.datetime,self.datetime.weekday())
+ return "%s (%s) -- weekday %s"%(self.path(),self.datetime,self.datetime.weekday())
+
+ def path (self):
+ return os.path.normpath(os.path.join(self.dir,self.filename))
def age_days (self):
return self.age.days
- def age_weeks (self):
- return self.age.days/7
-
# oldest first
@staticmethod
def sort_age (file1, file2):
elif w1!=PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
return 1
- month_marrier=None
+ month_barrier=None
+ week_barrier=None
@staticmethod
- def compute_month_barrier ():
- if not File.month_marrier:
- # find the exact datetime for a month change
- # KEEP_FILES_IN_MONTH_AFTER months ago +
- year=now.year
- month=now.month
- day=1
- if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
- month -= KEEP_FILES_IN_MONTH_AFTER
- else:
- year -= 1
- month += (12-KEEP_FILES_IN_MONTH_AFTER)
- File.month_marrier = datetime (year=year, month=month, day=day)
-
- return File.month_marrier
+ def _compute_barriers ():
+ if File.month_barrier:
+ return
+ # find the exact datetime for a month change
+ # KEEP_FILES_IN_MONTH_AFTER months ago +
+ year=now.year
+ month=now.month
+ day=1
+ if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
+ month -= KEEP_FILES_IN_MONTH_AFTER
+ else:
+ year -= 1
+ month += (12-KEEP_FILES_IN_MONTH_AFTER)
+ File.month_barrier = datetime (year=year, month=month, day=day)
+ # find the next monday morning
+ remaining_days=(7-File.month_barrier.weekday())%7
+ File.week_barrier=File.month_barrier+timedelta(days=remaining_days)
+
+ @staticmethod
+ def compute_month_barrier(): File._compute_barriers(); return File.month_barrier
+ @staticmethod
+ def compute_week_barrier(): File._compute_barriers(); return File.week_barrier
# returns a key for grouping files, the cleanup then
# preserving one entry in the set of files with same group
if self.datetime <= File.compute_month_barrier():
return self.datetime.strftime("%Y%m")
else:
- return "week%d"%self.age_weeks()
+ weeks=(self.datetime-File.compute_week_barrier()).days/7
+ weeks += 1
+ return "week%02d"%weeks
+
+ def cleanup (self, preserved):
+ global counter
+ counter+=1
+ src = os.path.abspath(os.path.basename(self.path()));
+ if self.options.destination:
+ dst = os.path.abspath(self.options.destination) + '/' + os.path.basename(self.path())
+ if self.options.verbose:
+ print "moving %s\n\tto %s"%(self.path(), dst)
+ if not self.options.dry_run:
+ os.rename (src, dst)
+ else:
+ if self.options.verbose:
+ print "Would cleanup %s"%(src)
+ print " (keeping %s)"%preserved.path()
+ if not self.options.dry_run:
+ if self.options.verbose: print "unlink",src
+ os.unlink (src)
# all files in a given timeslot (either month or week)
class Group:
def __init__ (self, groupname):
self.groupname=groupname
self.files=[]
+ self.count = 0
def insert (self, file):
self.files.append(file)
def epilogue (self):
self.files.sort (File.sort_relevance)
-# print 20*'*','after sort'
-# for file in self.files:
-# print "%s"%file
+ def keep_one (self):
+ for file in self.files[1:]:
+ file.cleanup(self.files[0])
+ self.count += 1
# all files with the same (prefix, suffix)
class Kind:
self.prefix=prefix
self.suffix=suffix
self.options=options
+ self.todelete = 0
+ self.oldest = None
+ self.newest = None
# will contain tuples (filename, datetime)
self.list = []
def add_file (self, dir, filename, datetime):
try:
self.list.append ( File (dir, filename, datetime, self.options) )
+ self.newest = datetime
+ if not self.oldest:
+ self.oldest = datetime
except FileIgnored: pass
except:
print 'could not append %s'%filename
group.epilogue()
def show (self):
+ if not self.options.verbose: return
print 30*'-',"%s-<date>.%s"%(self.prefix,self.suffix)
entries=len(self.list)
print " %d entries" % entries,
print " << %s - %s d old"%(f.filename, f.age_days()),
if entries >=2:
f=self.list[-1]
- print ">> %s - %s d old"%(f.filename, f.age_days())
+ print "|| %s - %s d old >>"%(f.filename, f.age_days())
groupnames=self.groups.keys()
groupnames.sort()
groupnames.reverse()
print " %s"%file
elif self.options.verbose:
print " Found %d groups"%len(groupnames),
- for g in groupnames: print "%s->%d"%(k,len(self.groups[g].files)),
+ for g in groupnames: print "%s->%d"%(g,len(self.groups[g].files)),
print ''
# sort on number of entries
def sort_size (k1, k2):
return len(k1.list)-len(k2.list)
+ def cleanup (self):
+ groupnames=self.groups.keys()
+ groupnames.sort()
+ for groupname in groupnames:
+ if self.options.extra_verbose: print 'GROUP',groupname
+ self.groups[groupname].keep_one()
+ self.todelete += self.groups[groupname].count
+
# keeps an index of all files found, index by (prefix, suffix), then sorted by time
class Index:
def __init__ (self,options):
for filename in filenames:
(b,p,s,d) = parse_filename (filename)
if not b:
- print "Filename %s does not match - skipped"%filename
+ if self.options.verbose:
+ print "Filename %s does not match - skipped"%filename
continue
self.insert (dir, filename, p, s, d)
-def handle_arg (index, dir, pattern):
+ def cleanup (self):
+ for kind in self.index.values():
+ kind.cleanup()
+
+ def summary (self):
+ print "%-30s%-10s%10s%25s%25s"%("Prefix","Suffix","Num (Del)","Oldest","Newest")
+ for kind in self.index.values():
+ print "%-30s%-10s%3s (%3s) %30s%30s"%(kind.prefix, kind.suffix, len(kind.list), kind.todelete, kind.oldest, kind.newest)
+
+def handle_dir_pattern (index, dir, pattern):
try:
os.chdir(dir)
except:
index.insert_many (dir, filenames)
def main ():
- parser=OptionParser()
+ usage="Usage: %prog [options] dir_or_files"
+ parser=OptionParser(usage=usage)
parser.add_option ("-v","--verbose",dest='verbose',action='store_true',default=False,
help="run in verbose mode")
parser.add_option ("-x","--extra-verbose",dest='extra_verbose',action='store_true',default=False,
help="run in extra verbose mode")
parser.add_option ("-n","--dry-run",dest='dry_run',action='store_true',default=False,
help="dry run")
+ parser.add_option ("-m","--move-to",dest='destination',action='store',type='string',default=False,
+ help="move to <destination> instead of removing the file")
+ parser.add_option ("-o","--offset",dest='offset',action='store',type='int',default=0,
+ help="pretend we run <offset> days in the future")
+ parser.add_option ("-s","--summary",dest='summary',action='store_true',default=False,
+ help="print a summary")
(options, args) = parser.parse_args()
if options.extra_verbose: options.verbose=True
+ try:
+ if options.offset !=0:
+ global now
+ now += timedelta(days=options.offset)
+ except:
+ traceback.print_exc()
+ print "Offset not understood %s - expect an int. number of days"%options.offset
+ sys.exit(1)
+ if options.destination and not os.path.isdir(options.destination):
+ print "Destination should be a directory"
+ sys.exit(1)
+
+ if len(args) ==0:
+ parser.print_help()
+ sys.exit(1)
+
# args can be directories, or patterns, like
# main /db-backup /db-backup-f8/*bz2
# in any case we handle each arg completely separately
- index = Index (options)
+ dir_patterns=[]
for arg in args:
if os.path.isdir (arg):
- handle_arg (index, arg, "*")
+ if arg=='.': arg=os.getcwd()
+ dir_patterns.append ( (arg, '*',) )
else:
(dir,pattern)=os.path.split(arg)
- if not dir: dir='.'
- handle_arg (index, dir, pattern)
+ if not dir: dir=os.getcwd()
+ dir_patterns.append ( (dir, pattern,) )
+
+ index = Index (options)
+ for (dir, pattern) in dir_patterns: handle_dir_pattern (index, dir, pattern)
index.epilogue()
index.show()
+ index.cleanup()
+ if (options.summary) :
+ index.summary()
+ if options.verbose:
+ print 'Found %d entries to unlink'%counter
if __name__ == '__main__':
main()