""" Fast directory tree synchronization via cacheable STAT information You want to use this script if you often need to revert to a 'pristine' version of a large tree with many files. It's faster than using rsync, especially when using trees at network drives. Differences b/w different directory trees are managed through stored stat information (size, timestamp...). Stat info is optionally cached at DIRNAME.stat file. Use case - in order to get a clean SDK quickly: 1. install it as /foo/my_clean_sdk and 'cd /foo' 2. do 'treestat -c my_clean_sdk my_work_sdk' (-c stands for 'copy') 3. Work with my_work_sdk to your hearts content, deleting and creating files 4. To get back to clean SDK quickly, repeat point 2. If my_clean_sdk is on a slow network drive, you can speed up the process by caching the file information:: treestat -s my_clean_sdk This takes a 'snapshot' at my_clean_sdk.stat, which is used for copying if it exists. Using this script is noticeably quicker than deleting the work directory and copying over all the files every time (the 'old-fashioned way' of getting a fresh, virgin sdk), especially if there are only few changed files. """ import os,pprint,shutil,sys,gzip import cPickle as pickle from optparse import OptionParser import pickle import logging logging.basicConfig(format = "%(message)s") log = logging.getLogger("treestat") log.setLevel(logging.INFO) rootdir = "/tmp" class MODES: snap = 1 copy = 2 delta = 3 def statfilename(d): fname = os.path.join(os.path.dirname(d), os.path.basename(d)) + ".stat" return fname def takesnap(d, omitfile=True): stats = getstats(d) if not omitfile: statfile = statfilename(d) log.debug("Writing snapshot to " + statfile) f = gzip.open( statfile,"w" ) pickle.dump(stats,f) return stats class ProgressCounter: def __init__(self, total): self.started = time.time() self.total = total self.prevrep = None def setCurrent(self, cur): self.cur = cur def getReport(): rep = int(float(self.cur) / float(self.total) * 100) if rep == self.prevrep: return None timeest = ((time.time() - self.started()) * self.total)/self.cur def analyzetree(src, tgt): src, tgt = map(os.path.abspath, (src,tgt)) #log.info("copying "+src+" to " + tgt) srcsnap = read_snapshot(src) tgtsnap = read_snapshot(tgt) dels, copys = treediffs(tgtsnap,srcsnap) totalbytes = sum(srcsnap[f][0] for f in copys) log.info("-----------\nTo delete:\n" + " ".join(os.path.basename(f) for f in dels)) log.info("-----------\nTo copy:\n" + " ".join(os.path.basename(f) for f in copys)) log.info( "%d to delete (%d KiB); %d to copy (%d KiB) " % ( len(dels), sum(tgtsnap[f][0] for f in dels) / 1024, len(copys), totalbytes/1024)) def writethisline(s): linelen = 78 sys.stdout.write( "\b" * linelen + str(s)[:linelen] + " " * (linelen-len(s))) class Spinner: def __init__(self, chars="|/-\\"): self.val = 0 self.chars = chars def spin(self): sys.stdout.write('\b' + self.chars[self.val % len(self.chars)]) self.val+=1 def copytree(src,tgt): src, tgt = map(os.path.abspath, (src,tgt)) log.info("copying "+src+" to " + tgt) srcsnap = read_snapshot(src) tgtsnap = read_snapshot(tgt) dels, copys = treediffs(tgtsnap,srcsnap) print len(dels),"to delete;",len(copys),"to copy" bytescopied = 0 totalbytes = sum(srcsnap[f][0] for f in copys) os.chdir(tgt) for fname in [os.path.join(tgt,f) for f in dels]: log.debug("rm "+ fname) try: os.remove(fname) except OSError: print "Failed to remove",fname oldpercent = 0 percent = 0 for f in copys: sf = os.path.join(src,f) tf = os.path.join(tgt,f) log.debug("cp " + sf +" -> "+ tf) dname = os.path.dirname(tf) if not os.path.isdir(dname): log.debug("mkdir "+dname) os.makedirs(dname) writethisline('%d%% %s' % (percent, f)) shutil.copy2(os.path.abspath(sf),dname) bytescopied += srcsnap[f][0] percent = int(float(bytescopied) / float(totalbytes) * 100) #if percent != oldpercent: # print percent,"%" # oldpercent = percent def read_snapshot(d, refresh = 0): """ read snapshot, assert existence """ # special case: d is stat file name, not dirname if os.path.isfile(d) and d.lower().endswith(".stat"): log.info("Using %s directly", d) return pickle.load(gzip.open(d)) if not os.path.isdir(d): log.info("Creating dir %s", d) os.mkdir(d) sfile = statfilename(d) snap = None if not os.path.isfile(sfile): log.info("Commencing without statfile " + sfile) snap = takesnap(d) elif not refresh: log.info("Refresh of %s not wanted, used as-is." % sfile) snap = pickle.load(gzip.open(sfile)) else: log.info("Overwriting statfile %s" % sfile) snap = takesnap(d, omitfile=False) log.info("%s has %s files", d,len(snap) ) return snap def main(): parser = OptionParser() parser.add_option("-s", "--snap", dest="mode",action = "store_const", const=MODES.snap, help="Take a snapshot of the dir", metavar="FILE") parser.add_option("-c", "--copy", dest="mode",action = "store_const", const=MODES.copy, help="Copy a directory", metavar="FILE") parser.add_option("-d","--delta", dest="mode", action = "store_const", const=MODES.delta, help="Show delta (what WOULD be copied)", metavar="FILE") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") (options, args) = parser.parse_args() if options.mode == MODES.snap: takesnap(os.path.abspath(args[0]), omitfile=False) elif options.mode == MODES.copy: copytree(args[0],args[1]) elif options.mode == MODES.delta: analyzetree(args[0],args[1]) def getstats(rootdir): statstructs = {} count = 0 os.chdir(rootdir) spinner=Spinner() for dp, dnames, fnames in os.walk("."): # don't want .\ dp = dp[2:] #print len(fnames),"files in",dp spinner.spin() for f in fnames: fname = os.path.join(dp,f) fullstat = os.stat(fname) st = (fullstat.st_size, fullstat.st_mtime) statstructs[fname.lower()] = st return statstructs def stats_differ(stat1, stat2): difference = cmp( stat1, stat2 ) #(stat1.st_size, stat1.st_mtime), #(stat2.st_size, stat2.st_mtime)) return difference def treediffs(ostat, nstat): deletes = [] copies = [] for f in ostat: if f not in nstat: deletes.append(f) else: if stats_differ(ostat[f], nstat[f]): #print "changed",f copies.append(f) for f in nstat: if f not in ostat: copies.append(f) return deletes, copies if __name__ == "__main__": main()