3 ### a utility to filter backups in the /db-backup area
5 # lists all files that match <prefix>-<date>-<time>.<suffix>
6 # and preserves only the following
7 # (*) more than 2 months old : one file per month
8 # (*) more than 2 weeks old : one file per week
9 # (*) recent stuff: unchanged
14 from datetime import datetime, timedelta
19 from optparse import OptionParser
24 class FileIgnored (Exception): pass
27 LEAVE_FILES_YOUNGER_THAN = 20
28 # keep that amount of (plain) months organized in weeks
29 # should not exceed 11
30 KEEP_FILES_IN_MONTH_AFTER = 2
33 # we prefer saturday as this is the end of the week
37 ## utility to parse a filename
38 datetime_pattern="(?P<datetime>[0-9]{8}\-[0-9]{4})"
40 # (bool, prefix, suffix, datetime)
42 prefix_pattern="(?P<prefix>[\w\-]+)"
43 datetime_pattern="(?P<datetime>[0-9]{8}\-[0-9]{4})"
44 suffix_pattern="(?P<suffix>[\w\.]+)"
45 filename_matcher=re.compile("\A%s[-_]%s\.%s\Z"%(prefix_pattern,datetime_pattern,suffix_pattern))
47 parsing_failed = (False, None, None, None)
49 def parse_filename (filename):
50 match=filename_matcher.match(filename)
56 dt = datetime.strptime(d['datetime'],"%Y%m%d-%H%M")
57 return (True, d['prefix'], d['suffix'], dt)
59 print "failed to parse timestamp %s from %s"%(d['datetime'],filename)
63 # one entry like this per file, managed in the Kind class
66 def __init__ (self, dir, filename, datetime, options):
68 self.filename=filename
69 self.datetime=datetime
72 self.weekday=self.datetime.weekday()
74 if self.options.verbose: print 'Filename %s is from the future - skipped'%filename
75 raise FileIgnored,"Filename from the future %s"%filename
76 self.group = self._group_string()
79 return "%s (%s) -- weekday %s"%(self.path(),self.datetime,self.datetime.weekday())
82 return os.path.normpath(os.path.join(self.dir,self.filename))
89 def sort_age (file1, file2):
90 # for 2.7, seems safer
92 return int((file1.datetime-file2.datetime).total_seconds())
93 # otherwise resort to days, we should be clear as our backups are daily
95 return int((file1.datetime-file2.datetime).days)
98 def sort_relevance (file1, file2):
101 if w1==PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
102 return File.sort_age (file1,file2)
103 elif w1!=PREFERRED_WEEKDAY and w2!=PREFERRED_WEEKDAY:
104 return File.sort_age (file1,file2)
105 elif w1==PREFERRED_WEEKDAY and w2!=PREFERRED_WEEKDAY:
107 elif w1!=PREFERRED_WEEKDAY and w2==PREFERRED_WEEKDAY:
113 def _compute_barriers ():
114 if File.month_barrier:
116 # find the exact datetime for a month change
117 # KEEP_FILES_IN_MONTH_AFTER months ago +
121 if now.month>=KEEP_FILES_IN_MONTH_AFTER+1:
122 month -= KEEP_FILES_IN_MONTH_AFTER
125 month += (12-KEEP_FILES_IN_MONTH_AFTER)
126 File.month_barrier = datetime (year=year, month=month, day=day)
127 # find the next monday morning
128 remaining_days=(7-File.month_barrier.weekday())%7
129 File.week_barrier=File.month_barrier+timedelta(days=remaining_days)
132 def compute_month_barrier(): File._compute_barriers(); return File.month_barrier
134 def compute_week_barrier(): File._compute_barriers(); return File.week_barrier
136 # returns a key for grouping files, the cleanup then
137 # preserving one entry in the set of files with same group
138 def _group_string (self):
139 if self.age.days<=LEAVE_FILES_YOUNGER_THAN:
140 if self.options.verbose: print 'Filename %s is recent (%d d) - skipped'%\
141 (self.filename,LEAVE_FILES_YOUNGER_THAN)
142 raise FileIgnored,"Filename %s is recent"%self.filename
144 if self.datetime <= File.compute_month_barrier():
145 return self.datetime.strftime("%Y%m")
147 weeks=(self.datetime-File.compute_week_barrier()).days/7
149 return "week%02d"%weeks
151 def cleanup (self, preserved):
154 src = os.path.abspath(os.path.basename(self.path()));
155 if self.options.destination:
156 dst = os.path.abspath(self.options.destination) + '/' + os.path.basename(self.path())
157 if self.options.verbose:
158 print "moving %s\n\tto %s"%(self.path(), dst)
159 if not self.options.dry_run:
162 if self.options.verbose:
163 print "Would cleanup %s"%(src)
164 print " (keeping %s)"%preserved.path()
165 if not self.options.dry_run:
166 if self.options.verbose: print "unlink",src
169 # all files in a given timeslot (either month or week)
171 def __init__ (self, groupname):
172 self.groupname=groupname
175 def insert (self, file):
176 self.files.append(file)
178 self.files.sort (File.sort_relevance)
180 for file in self.files[1:]:
181 file.cleanup(self.files[0])
184 # all files with the same (prefix, suffix)
187 def __init__ (self, prefix, suffix, options):
194 # will contain tuples (filename, datetime)
197 # allow for basic checking to be done in File
198 def add_file (self, dir, filename, datetime):
200 self.list.append ( File (dir, filename, datetime, self.options) )
201 self.newest = datetime
203 self.oldest = datetime
204 except FileIgnored: pass
206 print 'could not append %s'%filename
207 traceback.print_exc()
211 # sort self.list according to file age, oldest first
212 self.list.sort(File.sort_age)
213 # prepare groups according to age
215 for file in self.list:
217 if groupname not in self.groups:
218 self.groups[groupname]=Group(groupname)
219 self.groups[groupname].insert(file)
220 for group in self.groups.values():
224 if not self.options.verbose: return
225 print 30*'-',"%s-<date>.%s"%(self.prefix,self.suffix)
226 entries=len(self.list)
227 print " %d entries" % entries,
230 print " << %s - %s d old"%(f.filename, f.age_days()),
233 print "|| %s - %s d old >>"%(f.filename, f.age_days())
234 groupnames=self.groups.keys()
237 if self.options.extra_verbose:
238 print " Found %d groups"%len(groupnames)
241 files=self.groups[g].files
244 elif self.options.verbose:
245 print " Found %d groups"%len(groupnames),
246 for g in groupnames: print "%s->%d"%(g,len(self.groups[g].files)),
249 # sort on number of entries
251 def sort_size (k1, k2):
252 return len(k1.list)-len(k2.list)
255 groupnames=self.groups.keys()
257 for groupname in groupnames:
258 if self.options.extra_verbose: print 'GROUP',groupname
259 self.groups[groupname].keep_one()
260 self.todelete += self.groups[groupname].count
262 # keeps an index of all files found, index by (prefix, suffix), then sorted by time
264 def __init__ (self,options):
268 def insert (self, dir, filename, prefix, suffix, datetime):
269 key= (prefix, suffix)
270 if key not in self.index:
271 self.index[key] = Kind(prefix,suffix, self.options)
272 self.index[key].add_file (dir, filename, datetime)
274 # we're done inserting, do housecleaning
276 for (key, kind) in self.index.items():
280 # sort on number of entries
281 kinds = self.index.values()
282 kinds.sort (Kind.sort_size)
286 def insert_many (self, dir, filenames):
287 for filename in filenames:
288 (b,p,s,d) = parse_filename (filename)
290 if self.options.verbose:
291 print "Filename %s does not match - skipped"%filename
293 self.insert (dir, filename, p, s, d)
296 for kind in self.index.values():
300 print "%-30s%-10s%10s%25s%25s"%("Prefix","Suffix","Num (Del)","Oldest","Newest")
301 for kind in self.index.values():
302 print "%-30s%-10s%3s (%3s) %30s%30s"%(kind.prefix, kind.suffix, len(kind.list), kind.todelete, kind.oldest, kind.newest)
304 def handle_dir_pattern (index, dir, pattern):
308 print "Cannot chdir into %s - skipped"%dir
310 filenames=glob(pattern)
311 index.insert_many (dir, filenames)
314 usage="Usage: %prog [options] dir_or_files"
315 parser=OptionParser(usage=usage)
316 parser.add_option ("-v","--verbose",dest='verbose',action='store_true',default=False,
317 help="run in verbose mode")
318 parser.add_option ("-x","--extra-verbose",dest='extra_verbose',action='store_true',default=False,
319 help="run in extra verbose mode")
320 parser.add_option ("-n","--dry-run",dest='dry_run',action='store_true',default=False,
322 parser.add_option ("-m","--move-to",dest='destination',action='store',type='string',default=False,
323 help="move to <destination> instead of removing the file")
324 parser.add_option ("-o","--offset",dest='offset',action='store',type='int',default=0,
325 help="pretend we run <offset> days in the future")
326 parser.add_option ("-s","--summary",dest='summary',action='store_true',default=False,
327 help="print a summary")
328 (options, args) = parser.parse_args()
329 if options.extra_verbose: options.verbose=True
331 if options.offset !=0:
333 now += timedelta(days=options.offset)
335 traceback.print_exc()
336 print "Offset not understood %s - expect an int. number of days"%options.offset
339 if options.destination and not os.path.isdir(options.destination):
340 print "Destination should be a directory"
347 # args can be directories, or patterns, like
348 # main /db-backup /db-backup-f8/*bz2
349 # in any case we handle each arg completely separately
352 if os.path.isdir (arg):
353 if arg=='.': arg=os.getcwd()
354 dir_patterns.append ( (arg, '*',) )
356 (dir,pattern)=os.path.split(arg)
357 if not dir: dir=os.getcwd()
358 dir_patterns.append ( (dir, pattern,) )
360 index = Index (options)
361 for (dir, pattern) in dir_patterns: handle_dir_pattern (index, dir, pattern)
365 if (options.summary) :
368 print 'Found %d entries to unlink'%counter
370 if __name__ == '__main__':