#!/usr/bin/env python3 # vim: ai ts=4 sts=4 sw=4 """ Cron-job that syncs remote repositories when necessary. NOTE! Considered doing threading, but the cvs import subprocesses need correct working directory. In the future, we could do this multi-process if needed. For now, just run single-threaded. """ import sys import os import shutil import psycopg2 import configparser from util.LockFile import LockFile class RepoSync(object): def __init__(self, db, conf): self.db = db self.conf = conf def sync(self): curs = self.db.cursor() curs.execute(""" SELECT id,name,repotype_id,remoteurl,remotemodule,lastsynced FROM repositories INNER JOIN remoterepositories ON repositories.remoterepository_id=remoterepositories.id WHERE approved ORDER BY name """) for id, name, repotype, remoteurl, remotemodule, lastsynced in curs: if name.find('/') > 0: print("Subdirectories not supported when synchronizing") continue s = SyncMethod.get(repotype) s.init(self.conf, name, remoteurl, remotemodule) s.sync() s.finalize() c2 = self.db.cursor() c2.execute("UPDATE remoterepositories SET lastsynced=CURRENT_TIMESTAMP WHERE id=%s", (id, )) self.db.commit() class SyncMethod(object): @classmethod def get(cls, repotype): if repotype == "cvs": return SyncMethodCvs() if repotype == "rsynccvs": return SyncMethodRsyncCvs() if repotype == "git": return SyncMethodGit() raise Exception("No implementation for repository type %s found" % repotype) def __init__(self): self.name = self.remoteurl = self.remotemodule = None def init(self, conf, name, remoteurl, remotemodule): self.conf = conf self.name = name self.remoteurl = remoteurl self.remotemodule = remotemodule self.repopath = "%s/repos/%s.git" % (self.conf.get("paths", "githome"), self.name) os.environ['GIT_DIR'] = self.repopath def sync(self): if not os.path.isdir(self.repopath): self.initialsync() else: self.normalsync() def initialsync(self): raise NotImplementedError("sync method not implemented") def normalsync(self): raise NotImplementedError("sync method not implemented") def finalize(self): savedir = os.getcwd() os.chdir(self.repopath) self.system("git update-server-info") os.chdir(savedir) if 'GIT_DIR' in os.environ: del os.environ['GIT_DIR'] def system(self, cmd): # Version of os.system() that raises an exception if the command # fails to run or returns with bad exit code. r = os.system(cmd) if r != 0: raise Exception("Failed to execute \"%s\": %s" % (cmd, r)) return 0 class SyncMethodCvs(SyncMethod): # Synchronize using "git cvsimport", which deals with remove CVS repositories # but apparantly does not deal with branches very well. def initialsync(self): # git cvsimport is evil. The first time, we need to let it create # a non-bare repository. Otherwise it creates one inside the bare one del os.environ['GIT_DIR'] self.normalsync() # Now turn this into a bare repository for n in os.listdir("%s/.git" % self.repopath): shutil.move( "%s/.git/%s" % (self.repopath, n), "%s/%s" % (self.repopath, n) ) os.rmdir("%s/.git" % self.repopath) def normalsync(self): # Not initial sync, so just do a sync self.system("git cvsimport -v -d %s -r master -C %s -i -k %s" % ( # CVS url self.remoteurl, # New repo self.repopath, # cvs module self.remotemodule, )) class SyncMethodRsyncCvs(SyncMethod): # Rsync a cvs repository, and then use fromcvs to convert it to git. # This is really only used for the main repository def initialsync(self): # We really only use this for the main repo, so way too lazy to set # this up now. Do it manually ;-) raise NotImplementedError("Sorry, initial sync for rsync-cvs not implemented") def normalsync(self): rsyncpath = "%s/rsyncsrc/%s" % (self.conf.get("paths", "githome"), self.name) # First, rsync the cvs repository self.system("rsync -azCH --delete %s %s" % ( self.remoteurl, rsyncpath )) # If an authormap exists for this repository, copy it over now. The # rsync process will remove it again, so we need to redo this after # each time we rsync. if os.path.isfile("%s/authormap/%s" % ( self.conf.get("paths", "githome"), self.name)): shutil.copyfile( "%s/authormap/%s" % ( self.conf.get("paths", "githome"), self.name), "%s/CVSROOT/authormap" % rsyncpath) # Now perform Git Import Magic (TM) savedir = os.getcwd() os.chdir("%s/sw/fromcvs" % self.conf.get("paths", "githome")) # Perform Magic! self.system("ruby togit.rb %s %s %s" % ( rsyncpath, self.remotemodule, self.repopath, )) # Repack changes os.chdir(self.repopath) self.system("git repack -f -d") # Restore working dir os.chdir(savedir) class SyncMethodGit(SyncMethod): # Sync with a remote git repository. def initialsync(self): self.system("git clone --no-checkout --bare %s %s" % ( self.remoteurl, self.repopath )) def normalsync(self): savedir = os.getcwd() os.chdir(self.repopath) del os.environ['GIT_DIR'] self.system("git fetch -u %s master:master" % self.remoteurl) os.chdir(savedir) if __name__ == "__main__": c = configparser.ConfigParser() c.read("pggit.settings") lock = LockFile("%s/repos/.reposync_interlock" % c.get("paths", "githome")) db = psycopg2.connect(c.get('database', 'db')) RepoSync(db, c).sync()