aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-06-30 16:59:25 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-06-30 16:59:25 -0400
commit356b68bfd8a176ce5b36ed317fb24975fe2e6124 (patch)
treeb2eddffa862fb227f523b24de3f23f1a0dd54b2c
parent1960b05f63774b500f311c40e6c3c6efd9208848 (diff)
downloadgrokmirror-356b68bfd8a176ce5b36ed317fb24975fe2e6124.tar.gz
A bunch of fixes from testing
Replicating 20,000 repos from US to China is an excellent way to find a whole bunch of things that need to be fixed. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--grokmirror/__init__.py21
-rwxr-xr-xgrokmirror/fsck.py24
-rwxr-xr-xgrokmirror/manifest.py51
-rwxr-xr-xgrokmirror/pull.py66
4 files changed, 123 insertions, 39 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index 4dccb82..8fef8c0 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -355,14 +355,14 @@ def list_repo_remotes(fullpath, withurl=False):
ecode, out, err = run_git_command(fullpath, args)
if not len(out):
logger.debug('Could not list remotes in %s', fullpath)
- return set()
+ return list()
if not withurl:
- return set(out.split('\n'))
+ return out.split('\n')
- remotes = set()
+ remotes = list()
for line in out.split('\n'):
- remotes.add(tuple(line.split()[:2]))
+ remotes.append(tuple(line.split()[:2]))
return remotes
@@ -383,11 +383,16 @@ def add_repo_to_objstore(obstrepo, fullpath):
def fetch_objstore_repo(obstrepo, fullpath=None):
+ my_remotes = list_repo_remotes(obstrepo)
if fullpath:
virtref = objstore_virtref(fullpath)
- remotes = {virtref}
+ if virtref in my_remotes:
+ remotes = {virtref}
+ else:
+ logger.debug('%s is not in remotes for %s', fullpath, obstrepo)
+ return False
else:
- remotes = list_repo_remotes(obstrepo)
+ remotes = my_remotes
success = True
for remote in remotes:
@@ -657,7 +662,7 @@ def read_manifest(manifile, wait=False):
else:
fh = open(manifile, 'rb')
- logger.info('Reading %s', manifile)
+ logger.debug('Reading %s', manifile)
jdata = fh.read().decode('utf-8')
fh.close()
@@ -679,7 +684,7 @@ def write_manifest(manifile, manifest, mtime=None, pretty=False):
import shutil
import gzip
- logger.info('Writing new %s', manifile)
+ logger.debug('Writing new %s', manifile)
(dirname, basename) = os.path.split(manifile)
(fd, tmpfile) = tempfile.mkstemp(prefix=basename, dir=dirname)
diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py
index d6eb8d8..3f56d17 100755
--- a/grokmirror/fsck.py
+++ b/grokmirror/fsck.py
@@ -142,7 +142,7 @@ def run_git_repack(fullpath, config, level=1, prune=True):
repack_flags.append('-a')
# We only prune if all repos pointing to us are public
- urls = grokmirror.list_repo_remotes(fullpath, withurl=True)
+ urls = set(grokmirror.list_repo_remotes(fullpath, withurl=True))
mine = set([x[1] for x in urls])
amap = grokmirror.get_altrepo_map(toplevel)
if mine != amap[fullpath]:
@@ -775,26 +775,30 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
args = ['remote', 'remove', virtref]
grokmirror.run_git_command(sibling, args)
continue
+ logger.info(' moving: %s', childpath)
success = grokmirror.add_repo_to_objstore(mdest, childpath)
if not success:
logger.critical('Could not add %s to %s', childpath, mdest)
continue
- logger.info(' fetching : %s', childpath)
+ logger.info(' : fetching into %s', os.path.basename(mdest))
success = grokmirror.fetch_objstore_repo(mdest, childpath)
if not success:
- logger.critical('Failed to migrate %s from %s to %s', childpath, os.path.basename(sibling),
+ logger.critical('Failed to fetch %s from %s to %s', childpath, os.path.basename(sibling),
os.path.basename(mdest))
continue
- logger.info(' migrating : %s', childpath)
+ logger.info(' : repointing alternates')
grokmirror.set_altrepo(childpath, mdest)
amap[sibling].remove(childpath)
amap[mdest].add(childpath)
args = ['remote', 'remove', virtref]
grokmirror.run_git_command(sibling, args)
- logger.info(' done : %s', childpath)
+ logger.info(' : done')
obst_changes = True
+ if mdest in status:
+ # Force full repack of merged obstrepos
+ status[mdest]['nextcheck'] = todayiso
# Not an else, because the previous step may have migrated things
if obstrepo not in amap or not len(amap[obstrepo]):
@@ -838,11 +842,11 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
}
nextcheck = datetime.datetime.strptime(status[obstrepo]['nextcheck'], '%Y-%m-%d')
- if nextcheck <= today:
- repack_level = 2
- else:
- obj_info = grokmirror.get_repo_obj_info(obstrepo)
- repack_level = get_repack_level(obj_info)
+ obj_info = grokmirror.get_repo_obj_info(obstrepo)
+ repack_level = get_repack_level(obj_info)
+ if repack_level > 1 and nextcheck > today:
+ # Don't do full repacks outside of schedule
+ repack_level = 1
if repack_level:
to_process.add((obstrepo, 'repack', repack_level))
diff --git a/grokmirror/manifest.py b/grokmirror/manifest.py
index 0cdce96..c221cef 100755
--- a/grokmirror/manifest.py
+++ b/grokmirror/manifest.py
@@ -37,6 +37,12 @@ def update_manifest(manifest, toplevel, fullpath, usenow):
sys.exit(1)
gitdir = '/' + os.path.relpath(fullpath, toplevel)
+ # Ignore it if it's an empty git repository
+ fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True)
+ if not fp:
+ logger.info('%s has no heads, ignoring', gitdir)
+ return
+
if gitdir not in manifest:
# We didn't normalize paths to be always with a leading '/', so
# check the manifest for both and make sure we only save the path with a leading /
@@ -49,12 +55,6 @@ def update_manifest(manifest, toplevel, fullpath, usenow):
else:
logger.info('Updating %s in the manifest', gitdir)
- # Ignore it if it's an empty git repository
- fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True)
- if not fp:
- logger.info('%s has no heads, ignoring', gitdir)
- return
-
description = None
try:
descfile = os.path.join(fullpath, 'description')
@@ -75,7 +75,13 @@ def update_manifest(manifest, toplevel, fullpath, usenow):
args = ['for-each-ref', '--sort=-committerdate', '--format=%(committerdate:iso-strict)', '--count=1']
ecode, out, err = grokmirror.run_git_command(fullpath, args)
if len(out):
- modified = datetime.datetime.fromisoformat(out)
+ try:
+ modified = datetime.datetime.fromisoformat(out)
+ except AttributeError:
+ # Python 3.6 doesn't have fromisoformat
+ # remove : from the TZ info
+ out = out[:-3] + out[-2:]
+ modified = datetime.datetime.strptime(out, '%Y-%m-%dT%H:%M:%S%z')
if not modified:
modified = datetime.datetime.now()
@@ -208,6 +214,9 @@ def parse_args():
help='When running with arguments, wait if manifest is not '
'there (can be useful when multiple writers are writing '
'the manifest)')
+ op.add_option('-o', '--fetch-objstore', dest='fetchobst',
+ action='store_true', default=False,
+ help='Fetch updates into objstore repo (if used)')
op.add_option('-v', '--verbose', dest='verbose', action='store_true',
default=False,
help='Be verbose and tell us what you are doing')
@@ -226,8 +235,9 @@ def parse_args():
def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False,
check_export_ok=False, purge=False, remove=False,
- pretty=False, ignore=None, wait=False, verbose=False):
+ pretty=False, ignore=None, wait=False, verbose=False, fetchobst=False):
+ startt = datetime.datetime.now()
if args is None:
args = list()
if ignore is None:
@@ -305,6 +315,7 @@ def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False,
symlinks = list()
# noinspection PyTypeChecker
run = em.counter(total=len(gitdirs), desc='Processing:', unit='repos', leave=False)
+ tofetch = set()
for gitdir in gitdirs:
run.update()
# check to make sure this gitdir is ok to export
@@ -324,16 +335,29 @@ def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False,
symlinks.append(gitdir)
else:
update_manifest(manifest, toplevel, gitdir, usenow)
-
- logger.info('Updated %s records in %0.2fs', len(gitdirs), run.elapsed)
- run.close()
- em.stop()
+ if fetchobst:
+ # Do it after we're done with manifest, to avoid keeping it locked
+ tofetch.add(gitdir)
if len(symlinks):
set_symlinks(manifest, toplevel, symlinks)
grokmirror.write_manifest(manifile, manifest, pretty=pretty)
grokmirror.manifest_unlock(manifile)
+ run.close()
+ em.stop()
+
+ for gitdir in tofetch:
+ altrepo = grokmirror.get_altrepo(gitdir)
+ if altrepo and re.match(uuidre, altrepo, flags=re.I):
+ logger.info('Fetching objects into %s', os.path.basename(altrepo))
+ grokmirror.fetch_objstore_repo(altrepo, gitdir)
+
+ elapsed = datetime.datetime.now() - startt
+ if len(gitdirs) > 1:
+ logger.info('Updated %s records in %ds', len(gitdirs), elapsed.total_seconds())
+ else:
+ logger.info('Done in %0.2fs', elapsed.total_seconds())
def command():
@@ -343,7 +367,8 @@ def command():
opts.manifile, opts.toplevel, args=args, logfile=opts.logfile,
usenow=opts.usenow, check_export_ok=opts.check_export_ok,
purge=opts.purge, remove=opts.remove, pretty=opts.pretty,
- ignore=opts.ignore, wait=opts.wait, verbose=opts.verbose)
+ ignore=opts.ignore, wait=opts.wait, verbose=opts.verbose,
+ fetchobst=opts.fetchobst)
if __name__ == '__main__':
diff --git a/grokmirror/pull.py b/grokmirror/pull.py
index 52cf913..7a40748 100755
--- a/grokmirror/pull.py
+++ b/grokmirror/pull.py
@@ -116,7 +116,9 @@ def queue_worker(config, gitdir, repoinfo, action, obstrepo, is_private):
logger.debug('FP match, not pulling %s', gitdir)
if success:
- set_agefile(toplevel, gitdir, repoinfo.get('modified'))
+ modified = repoinfo.get('modified', None)
+ if modified is not None:
+ set_agefile(toplevel, gitdir, modified)
if my_fp is not None:
grokmirror.set_repo_fingerprint(toplevel, gitdir, fingerprint=my_fp)
@@ -384,7 +386,49 @@ def find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups, mappi
obstrepo = os.path.join(obstdir, '%s.git' % forkgroup)
if not os.path.isdir(obstrepo):
- # No siblings matched an existing objstore repo, we are the first
+ # No siblings matched an existing objstore repo
+ # Do we have any existing siblings that were cloned without obstrepo?
+ # This would happen when an initial fork is created of an existing repo.
+ found = False
+ for s_fullpath in forkgroups[forkgroup]:
+ if os.path.isdir(s_fullpath):
+ is_private = False
+ s_gitdir = '/' + os.path.relpath(s_fullpath, toplevel)
+ for privmask in privmasks:
+ # Does this repo match privrepo
+ if fnmatch.fnmatch(s_gitdir, privmask):
+ is_private = True
+ break
+ if is_private:
+ # Can't use this one, as it's private
+ continue
+ # Make a new obstrepo and fetch it there
+ try:
+ # XXX: Need to deal with a situation if this repository is not known to us
+ # via manifests.
+ logger.debug('reusing existing %s as new obstrepo %s', s_gitdir, obstrepo)
+ obstrepo = grokmirror.setup_objstore_repo(obstdir, name=forkgroup)
+ # We want to prevent other siblings from being fetched until
+ # we have some objects available
+ pending_obstrepos.add(obstrepo)
+ logger.debug('locked obstrepo %s', obstrepo)
+ mapping[s_fullpath] = obstrepo
+ grokmirror.add_repo_to_objstore(obstrepo, s_fullpath)
+ grokmirror.set_altrepo(s_fullpath, obstrepo)
+ candidates.add((s_gitdir, 'objstore', obstrepo, False))
+ found = True
+ except IOError:
+ # External process is keeping it locked
+ logger.debug('cannot reuse %s until %s is available', s_gitdir, obstrepo)
+ # mark all siblings as ignore
+ for sf in forkgroups[forkgroup]:
+ ignore.add(sf)
+ break
+
+ if found:
+ continue
+
+ # No existing repos we can reuse, so let's start a new objstore repo.
# But wait, are we a private repo?
obstrepo = grokmirror.setup_objstore_repo(obstdir, name=forkgroup)
if is_private:
@@ -403,7 +447,7 @@ def find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups, mappi
if not found:
# clone as a non-sibling repo, then
logger.debug('all siblings are private, so clone as individual repo')
- candidates.add((c_gitdir, 'clone', None, True))
+ candidates.add((c_gitdir, 'init', None, True))
continue
else:
# We'll add it when we come to it
@@ -647,8 +691,8 @@ def pull_mirror(config, verbose=False, force=False, nomtime=False,
r_forkgroups = dict()
for gitdir in set(r_culled.keys()):
e_cmp.update()
- if r_culled[gitdir]['fingerprint'] is None:
- logger.critical('Manifest files without fingeprints no longer supported.')
+ if not r_culled[gitdir].get('fingerprint', None):
+ logger.critical('Repos without fingerprint info (skipped): %s', gitdir)
continue
fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
@@ -797,12 +841,18 @@ def pull_mirror(config, verbose=False, force=False, nomtime=False,
candidates = find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups,
mapping, maxactions=60-len(results))
for gitrepo, action, refrepo, is_private in candidates:
- if action == 'init':
+ if action in ('init', 'objstore'):
freshclones.add(gitrepo)
- todo.remove((gitrepo, action))
+ if action == 'objstore':
+ # Objstore actions aren't in the initial set, because we add them
+ # on the fly as we repurpose existing repos for objstore
+ repoinfo = l_manifest[gitrepo]
+ else:
+ repoinfo = r_culled[gitrepo]
+ todo.remove((gitrepo, action))
logger.info(' Queued: %s', gitrepo)
e_que.update()
- res = wpool.apply_async(queue_worker, (config, gitrepo, r_culled[gitrepo],
+ res = wpool.apply_async(queue_worker, (config, gitrepo, repoinfo,
action, refrepo, is_private))
results.append(res)
e_que.refresh()