3 ### a utility to filter backups in the /db-backup area
5 # lists all files that match <prefix>-<date>-<time>.<suffix>
6 # and preserves only the following
7 # (*) more than 2 months old : one file per month
8 # (*) more than 2 weeks old : one file per week
9 # (*) recent stuff: unchanged
13 from datetime import datetime
18 from optparse import OptionParser
20 class FileIgnored (Exception): pass
23 LEAVE_FILES_YOUNGER_THAN = 20
24 # keep that amount of (plain) months organized in weeks
25 # should not exceed 11
26 KEEP_FILES_IN_MONTH_AFTER = 2
29 # we prefer saturday as this is the end of the week
33 ## utility to parse a filename
34 datetime_pattern="(?P<datetime>[0-9]{8}\-[0-9]{4})"
36 # (bool, prefix, suffix, datetime)
38 prefix_pattern="(?P<prefix>[\w\-]+)"
39 datetime_pattern="(?P<datetime>[0-9]{8}\-[0-9]{4})"
40 suffix_pattern="(?P<suffix>[\w\.]+)"
41 filename_matcher=re.compile("\A%s[-_]%s\.%s\Z"%(prefix_pattern,datetime_pattern,suffix_pattern))
43 parsing_failed = (False, None, None, None)
45 def parse_filename (filename):
46 match=filename_matcher.match(filename)
52 dt = datetime.strptime(d['datetime'],"%Y%m%d-%H%M")
53 return (True, d['prefix'], d['suffix'], dt)
55 print "failed to parse timestamp %s from %s"%(d['datetime'],filename)
59 # one entry like this per file, managed in the Kind class
64 def __init__ (self, dir, filename, datetime, options):
66 self.filename=filename
67 self.datetime=datetime
70 self.weekday=self.datetime.weekday()
72 if self.options.verbose: print 'Filename %s is from the future - skipped'%sfilename
73 raise FileIgnored,"Filename from the future %s"%filename
74 self.group = self._group_string()
77 return "%s (%s) -- weekday %s"%(self.filename,self.datetime,self.datetime.weekday())
83 return self.age.days/7
87 def sort_age (file1, file2):
88 # for 2.7, seems safer
90 return int((file1.datetime-file2.datetime).total_seconds())
91 # otherwise resort to days, we should be clear as our backups are daily
93 return int((file1.datetime-file2.datetime).days)
96 def sort_relevance (file1, file2):
99 if w1==PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
100 return File.sort_age (file1,file2)
101 elif w1!=PREFERRED_WEEKDAY and w2!=PREFERRED_WEEKDAY:
102 return File.sort_age (file1,file2)
103 elif w1==PREFERRED_WEEKDAY and w2!=PREFERRED_WEEKDAY:
105 elif w1!=PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
110 def compute_month_barrier ():
111 if not File.month_marrier:
112 # find the exact datetime for a month change
113 # KEEP_FILES_IN_MONTH_AFTER months ago +
117 if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
118 month -= KEEP_FILES_IN_MONTH_AFTER
121 month += (12-KEEP_FILES_IN_MONTH_AFTER)
122 File.month_marrier = datetime (year=year, month=month, day=day)
124 return File.month_marrier
126 # returns a key for grouping files, the cleanup then
127 # preserving one entry in the set of files with same group
128 def _group_string (self):
129 if self.age.days<=LEAVE_FILES_YOUNGER_THAN:
130 if self.options.verbose: print 'Filename %s is recent (%d d) - skipped'%\
131 (self.filename,LEAVE_FILES_YOUNGER_THAN)
132 raise FileIgnored,"Filename %s is recent"%self.filename
134 if self.datetime <= File.compute_month_barrier():
135 return self.datetime.strftime("%Y%m")
137 return "week%d"%self.age_weeks()
139 # all files in a given timeslot (either month or week)
141 def __init__ (self, groupname):
142 self.groupname=groupname
144 def insert (self, file):
145 self.files.append(file)
147 self.files.sort (File.sort_relevance)
148 # print 20*'*','after sort'
149 # for file in self.files:
152 # all files with the same (prefix, suffix)
155 def __init__ (self, prefix, suffix, options):
159 # will contain tuples (filename, datetime)
162 # allow for basic checking to be done in File
163 def add_file (self, dir, filename, datetime):
165 self.list.append ( File (dir, filename, datetime, self.options) )
166 except FileIgnored: pass
168 print 'could not append %s'%filename
169 traceback.print_exc()
173 # sort self.list according to file age, oldest first
174 self.list.sort(File.sort_age)
175 # prepare groups according to age
177 for file in self.list:
179 if groupname not in self.groups:
180 self.groups[groupname]=Group(groupname)
181 self.groups[groupname].insert(file)
182 for group in self.groups.values():
186 print 30*'-',"%s-<date>.%s"%(self.prefix,self.suffix)
187 entries=len(self.list)
188 print " %d entries" % entries,
191 print " << %s - %s d old"%(f.filename, f.age_days()),
194 print ">> %s - %s d old"%(f.filename, f.age_days())
195 groupnames=self.groups.keys()
198 if self.options.extra_verbose:
199 print " Found %d groups"%len(groupnames)
202 files=self.groups[g].files
205 elif self.options.verbose:
206 print " Found %d groups"%len(groupnames),
207 for g in groupnames: print "%s->%d"%(k,len(self.groups[g].files)),
210 # sort on number of entries
212 def sort_size (k1, k2):
213 return len(k1.list)-len(k2.list)
215 # keeps an index of all files found, index by (prefix, suffix), then sorted by time
217 def __init__ (self,options):
221 def insert (self, dir, filename, prefix, suffix, datetime):
222 key= (prefix, suffix)
223 if key not in self.index:
224 self.index[key] = Kind(prefix,suffix, self.options)
225 self.index[key].add_file (dir, filename, datetime)
227 # we're done inserting, do housecleaning
229 for (key, kind) in self.index.items():
233 # sort on number of entries
234 kinds = self.index.values()
235 kinds.sort (Kind.sort_size)
239 def insert_many (self, dir, filenames):
240 for filename in filenames:
241 (b,p,s,d) = parse_filename (filename)
243 print "Filename %s does not match - skipped"%filename
245 self.insert (dir, filename, p, s, d)
247 def handle_arg (index, dir, pattern):
251 print "Cannot chdir into %s - skipped"%dir
253 filenames=glob(pattern)
254 index.insert_many (dir, filenames)
257 parser=OptionParser()
258 parser.add_option ("-v","--verbose",dest='verbose',action='store_true',default=False,
259 help="run in verbose mode")
260 parser.add_option ("-x","--extra-verbose",dest='extra_verbose',action='store_true',default=False,
261 help="run in extra verbose mode")
262 parser.add_option ("-n","--dry-run",dest='dry_run',action='store_true',default=False,
264 (options, args) = parser.parse_args()
265 if options.extra_verbose: options.verbose=True
267 # args can be directories, or patterns, like
268 # main /db-backup /db-backup-f8/*bz2
269 # in any case we handle each arg completely separately
270 index = Index (options)
272 if os.path.isdir (arg):
273 handle_arg (index, arg, "*")
275 (dir,pattern)=os.path.split(arg)
277 handle_arg (index, dir, pattern)
281 if __name__ == '__main__':