summaryrefslogtreecommitdiff
path: root/reposync.py
blob: 51dbe82c5952bd0af32e270aaf640d99a92a58fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3
# vim: ai ts=4 sts=4 sw=4

"""
Cron-job that syncs remote repositories when necessary.

NOTE! Considered doing threading, but the cvs import subprocesses
      need correct working directory. In the future, we could do this
      multi-process if needed. For now, just run single-threaded.
"""

import sys
import os
import shutil
import psycopg2
import configparser
from util.LockFile import LockFile


class RepoSync(object):
    def __init__(self, db, conf):
        self.db = db
        self.conf = conf

    def sync(self):
        curs = self.db.cursor()
        curs.execute("""
SELECT id,name,repotype_id,remoteurl,remotemodule,lastsynced
FROM repositories
INNER JOIN remoterepositories ON repositories.remoterepository_id=remoterepositories.id
WHERE approved ORDER BY name
""")
        for id, name, repotype, remoteurl, remotemodule, lastsynced in curs:
            if name.find('/') > 0:
                print("Subdirectories not supported when synchronizing")
                continue
            s = SyncMethod.get(repotype)
            s.init(self.conf, name, remoteurl, remotemodule)
            s.sync()
            s.finalize()
            c2 = self.db.cursor()
            c2.execute("UPDATE remoterepositories SET lastsynced=CURRENT_TIMESTAMP WHERE id=%s", (id, ))
            self.db.commit()


class SyncMethod(object):
    @classmethod
    def get(cls, repotype):
        if repotype == "cvs":
            return SyncMethodCvs()
        if repotype == "rsynccvs":
            return SyncMethodRsyncCvs()
        if repotype == "git":
            return SyncMethodGit()
        raise Exception("No implementation for repository type %s found" % repotype)

    def __init__(self):
        self.name = self.remoteurl = self.remotemodule = None

    def init(self, conf, name, remoteurl, remotemodule):
        self.conf = conf
        self.name = name
        self.remoteurl = remoteurl
        self.remotemodule = remotemodule
        self.repopath = "%s/repos/%s.git" % (self.conf.get("paths", "githome"), self.name)

        os.environ['GIT_DIR'] = self.repopath

    def sync(self):
        if not os.path.isdir(self.repopath):
            self.initialsync()
        else:
            self.normalsync()

    def initialsync(self):
        raise NotImplementedError("sync method not implemented")

    def normalsync(self):
        raise NotImplementedError("sync method not implemented")

    def finalize(self):
        savedir = os.getcwd()
        os.chdir(self.repopath)
        self.system("git update-server-info")
        os.chdir(savedir)
        if 'GIT_DIR' in os.environ:
            del os.environ['GIT_DIR']

    def system(self, cmd):
        # Version of os.system() that raises an exception if the command
        # fails to run or returns with bad exit code.
        r = os.system(cmd)
        if r != 0:
            raise Exception("Failed to execute \"%s\": %s" % (cmd, r))
        return 0


class SyncMethodCvs(SyncMethod):
    # Synchronize using "git cvsimport", which deals with remove CVS repositories
    # but apparantly does not deal with branches very well.
    def initialsync(self):
        # git cvsimport is evil. The first time, we need to let it create
        # a non-bare repository. Otherwise it creates one inside the bare one
        del os.environ['GIT_DIR']
        self.normalsync()
        # Now turn this into a bare repository
        for n in os.listdir("%s/.git" % self.repopath):
            shutil.move(
                "%s/.git/%s" % (self.repopath, n),
                "%s/%s" % (self.repopath, n)
            )
        os.rmdir("%s/.git" % self.repopath)

    def normalsync(self):
        # Not initial sync, so just do a sync
        self.system("git cvsimport -v -d %s -r master -C %s -i -k %s" % (
            # CVS url
            self.remoteurl,
            # New repo
            self.repopath,
            # cvs module
            self.remotemodule,
        ))


class SyncMethodRsyncCvs(SyncMethod):
    # Rsync a cvs repository, and then use fromcvs to convert it to git.
    # This is really only used for the main repository
    def initialsync(self):
        # We really only use this for the main repo, so way too lazy to set
        # this up now. Do it manually ;-)
        raise NotImplementedError("Sorry, initial sync for rsync-cvs not implemented")

    def normalsync(self):
        rsyncpath = "%s/rsyncsrc/%s" % (self.conf.get("paths", "githome"), self.name)

        # First, rsync the cvs repository
        self.system("rsync -azCH --delete %s %s" % (
            self.remoteurl,
            rsyncpath
        ))

        # If an authormap exists for this repository, copy it over now. The
        # rsync process will remove it again, so we need to redo this after
        # each time we rsync.
        if os.path.isfile("%s/authormap/%s" % (
                self.conf.get("paths", "githome"), self.name)):
            shutil.copyfile(
                "%s/authormap/%s" % (
                    self.conf.get("paths", "githome"), self.name),
                "%s/CVSROOT/authormap" % rsyncpath)

        # Now perform Git Import Magic (TM)
        savedir = os.getcwd()
        os.chdir("%s/sw/fromcvs" % self.conf.get("paths", "githome"))

        # Perform Magic!
        self.system("ruby togit.rb %s %s %s" % (
            rsyncpath,
            self.remotemodule,
            self.repopath,
        ))

        # Repack changes
        os.chdir(self.repopath)
        self.system("git repack -f -d")

        # Restore working dir
        os.chdir(savedir)


class SyncMethodGit(SyncMethod):
    # Sync with a remote git repository.
    def initialsync(self):
        self.system("git clone --no-checkout --bare %s %s" % (
            self.remoteurl,
            self.repopath
        ))

    def normalsync(self):
        savedir = os.getcwd()
        os.chdir(self.repopath)
        del os.environ['GIT_DIR']
        self.system("git fetch -u %s master:master" % self.remoteurl)
        os.chdir(savedir)


if __name__ == "__main__":
    c = configparser.ConfigParser()
    c.read("pggit.settings")
    lock = LockFile("%s/repos/.reposync_interlock" % c.get("paths", "githome"))
    db = psycopg2.connect(c.get('database', 'db'))
    RepoSync(db, c).sync()