#!/usr/bin/python -tt # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Seth Vidal - skvidal@linux.duke.edu # (c) Duke University 2006 # copyfile - copyfile from one of a series of remote dirs if it has changed # usage is: copyfile /localdisk/destfile [filename on remote machine] # read config file (read from COPYFILE_CONFIG env var) # configs: #server/basepath #grouppath #group #checksum #filerepoindex # determine src/dest of file # look for the right file on remote server traversing dirs like: # baseurl/grouppath/group/hostname-src # baseurl/grouppath/group/src # baseurl/hostname-src # baseurl/src # md5/sha1 checksum dest file # compares to file in filerepo from srcdir # if not same: # downloads src from server to temp location # makes backup of dest # mv tempsrc onto dest # returns True if everything worked # else returns False import os import sys import shutil import socket from urlgrabber import grabber import tempfile import string import time import types class SimpleConfigFile(object): """simple config file object: reads in key=value pairs from a file and stores each as an attribute""" def __init__(self, filename): self.fn = filename fo = open(filename, 'r') for line in fo.readlines(): if line.startswith('#'): continue if line.strip() == '': continue (key, val) = line.split('=') key = key.strip().lower() val = val.strip() setattr(self, key, val) fo.close() class FileRepoIndex(object): """object for accessing/parsing filerepo files quickly""" def __init__(self, url, idxname): self.files = {} # fn = checksum self.url = url self.idxurl = '%s/%s' % (url, idxname) fo = grabber.urlopen(self.idxurl) for line in fo.readlines(): holder = line.split(' ') csum = holder[0].strip() fn = string.join(holder[1:]).strip() self.files[fn] = csum fo.close() def hasfile(self, filename, hostname): """return filename of most specific file for filename or None""" sfn = '%s-%s' % (hostname, filename) if self.files.has_key(sfn): return sfn if self.files.has_key(filename): return filename return None def checksum(self, filename): return self.files[filename] def error(msg): print >> sys.stderr, msg def retrievefile(url): """do the actual file retrieval to a temp dir, return tempdir+file""" tmpdir = tempfile.mkdtemp() fn = os.path.basename(url) tmpfn = '%s/%s' % (tmpdir, fn) try: loc = grabber.urlgrab(url, filename=tmpfn) except grabber.URLGrabError, e: error('Error downloading %s: %s' % (url, e)) return None return loc def backuplocal(fn): """make a date-marked backup of the specified file, return True or False on success or failure""" # backups named basename-YYYY-MM-DD@HH:MM~ ext = time.strftime("%Y-%m-%d@%H:%M~", time.localtime(time.time())) backupdest = '%s.%s' % (fn, ext) try: shutil.copy2(fn, backupdest) except shutil.Error, e: error('Error making backup of %s to %s: %s' % (fn, ext, e)) return False return True def finalmove(src, dest): # make the dir if need be # mv the file into place dirn = os.path.dirname(dest) if not os.path.exists(dirn): os.makedirs(dirn) try: shutil.move(src, dest) except shutil.Error, e: error('Error moving %s to %s: %s' % (src, dest, e)) return False return True def getChecksum(sumtype, file, CHUNK=2**16): """takes filename, hand back Checksum of it sumtype = md5 or sha filename = /path/to/file CHUNK=65536 by default""" # chunking brazenly lifted from Ryan Tomayko if type(file) is not types.StringType: fo = file # assume it's a file-like-object else: fo = open(file, 'r', CHUNK) if sumtype == 'md5': import md5 thissum = md5.new() else: import sha thissum = sha.new() chunk = fo.read while chunk: chunk = fo.read(CHUNK) thissum.update(chunk) if type(file) is types.StringType: fo.close() del fo return thissum.hexdigest() def main(src, dest): cfgfile = '/etc/copyfile.conf' if os.environ.has_key('COPYFILE_CONFIG'): cfgfile = os.environ['COPYFILE_CONFIG'] conf = SimpleConfigFile(cfgfile) # override group from env if os.environ.has_key('HOSTGROUP'): conf.group = os.environ['HOSTGROUP'] loc_csum = '0' if os.path.exists(dest): loc_csum = getChecksum(conf.checksum, dest) me = socket.gethostname() # look for the best file to download/check best = None bestcsum = None # look for group-specific first if hasattr(conf, 'group'): groupurl = '%s/%s' % (conf.grouppath, conf.group) gfp = FileRepoIndex(groupurl, conf.filerepoindex) best = gfp.hasfile(src, me) if best: bestcsum = gfp.checksum(best) besturl = '%s/%s' % (groupurl, best) # then in the normal basepath if not best: gfp = FileRepoIndex(conf.basepath, conf.filerepoindex) best = gfp.hasfile(src, me) if best: bestcsum = gfp.checksum(best) besturl = '%s/%s' % (conf.basepath, best) # if still nothing then output an error and exit if not best: error('File not found in remote repository: %s' % src) sys.exit(2) # now we're in the homestretch if loc_csum == bestcsum: # all happy, we're done sys.exit(1) locpath = retrievefile(besturl) if not locpath: error('File %s could not be downloaded and/or saved' % besturl) sys.exit(2) locpath_csum = getChecksum(conf.checksum, locpath) if locpath_csum != bestcsum: error('downloaded file %s and checksum from repo do not match, aborting' % best) sys.exit(2) if os.path.exists(dest): if not backuplocal(dest): error('Could not make backup file for %s' % dest) sys.exit(2) if not finalmove(locpath, dest): error('Could not move file %s into final place %s' % (src, dest)) sys.exit(2) # we succeeded! sys.exit(0) if __name__ == '__main__': dest = sys.argv[1] if len(sys.argv) == 3: src = sys.argv[2] if len(sys.argv) == 2: src = os.path.basename(dest) main(src, dest)