diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-06-30 16:59:25 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-06-30 16:59:25 -0400 |
commit | 356b68bfd8a176ce5b36ed317fb24975fe2e6124 (patch) | |
tree | b2eddffa862fb227f523b24de3f23f1a0dd54b2c | |
parent | 1960b05f63774b500f311c40e6c3c6efd9208848 (diff) | |
download | grokmirror-356b68bfd8a176ce5b36ed317fb24975fe2e6124.tar.gz |
A bunch of fixes from testing
Replicating 20,000 repos from US to China is an excellent way to find a
whole bunch of things that need to be fixed.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | grokmirror/__init__.py | 21 | ||||
-rwxr-xr-x | grokmirror/fsck.py | 24 | ||||
-rwxr-xr-x | grokmirror/manifest.py | 51 | ||||
-rwxr-xr-x | grokmirror/pull.py | 66 |
4 files changed, 123 insertions, 39 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py index 4dccb82..8fef8c0 100644 --- a/grokmirror/__init__.py +++ b/grokmirror/__init__.py @@ -355,14 +355,14 @@ def list_repo_remotes(fullpath, withurl=False): ecode, out, err = run_git_command(fullpath, args) if not len(out): logger.debug('Could not list remotes in %s', fullpath) - return set() + return list() if not withurl: - return set(out.split('\n')) + return out.split('\n') - remotes = set() + remotes = list() for line in out.split('\n'): - remotes.add(tuple(line.split()[:2])) + remotes.append(tuple(line.split()[:2])) return remotes @@ -383,11 +383,16 @@ def add_repo_to_objstore(obstrepo, fullpath): def fetch_objstore_repo(obstrepo, fullpath=None): + my_remotes = list_repo_remotes(obstrepo) if fullpath: virtref = objstore_virtref(fullpath) - remotes = {virtref} + if virtref in my_remotes: + remotes = {virtref} + else: + logger.debug('%s is not in remotes for %s', fullpath, obstrepo) + return False else: - remotes = list_repo_remotes(obstrepo) + remotes = my_remotes success = True for remote in remotes: @@ -657,7 +662,7 @@ def read_manifest(manifile, wait=False): else: fh = open(manifile, 'rb') - logger.info('Reading %s', manifile) + logger.debug('Reading %s', manifile) jdata = fh.read().decode('utf-8') fh.close() @@ -679,7 +684,7 @@ def write_manifest(manifile, manifest, mtime=None, pretty=False): import shutil import gzip - logger.info('Writing new %s', manifile) + logger.debug('Writing new %s', manifile) (dirname, basename) = os.path.split(manifile) (fd, tmpfile) = tempfile.mkstemp(prefix=basename, dir=dirname) diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py index d6eb8d8..3f56d17 100755 --- a/grokmirror/fsck.py +++ b/grokmirror/fsck.py @@ -142,7 +142,7 @@ def run_git_repack(fullpath, config, level=1, prune=True): repack_flags.append('-a') # We only prune if all repos pointing to us are public - urls = grokmirror.list_repo_remotes(fullpath, withurl=True) + urls = set(grokmirror.list_repo_remotes(fullpath, withurl=True)) mine = set([x[1] for x in urls]) amap = grokmirror.get_altrepo_map(toplevel) if mine != amap[fullpath]: @@ -775,26 +775,30 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, args = ['remote', 'remove', virtref] grokmirror.run_git_command(sibling, args) continue + logger.info(' moving: %s', childpath) success = grokmirror.add_repo_to_objstore(mdest, childpath) if not success: logger.critical('Could not add %s to %s', childpath, mdest) continue - logger.info(' fetching : %s', childpath) + logger.info(' : fetching into %s', os.path.basename(mdest)) success = grokmirror.fetch_objstore_repo(mdest, childpath) if not success: - logger.critical('Failed to migrate %s from %s to %s', childpath, os.path.basename(sibling), + logger.critical('Failed to fetch %s from %s to %s', childpath, os.path.basename(sibling), os.path.basename(mdest)) continue - logger.info(' migrating : %s', childpath) + logger.info(' : repointing alternates') grokmirror.set_altrepo(childpath, mdest) amap[sibling].remove(childpath) amap[mdest].add(childpath) args = ['remote', 'remove', virtref] grokmirror.run_git_command(sibling, args) - logger.info(' done : %s', childpath) + logger.info(' : done') obst_changes = True + if mdest in status: + # Force full repack of merged obstrepos + status[mdest]['nextcheck'] = todayiso # Not an else, because the previous step may have migrated things if obstrepo not in amap or not len(amap[obstrepo]): @@ -838,11 +842,11 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, } nextcheck = datetime.datetime.strptime(status[obstrepo]['nextcheck'], '%Y-%m-%d') - if nextcheck <= today: - repack_level = 2 - else: - obj_info = grokmirror.get_repo_obj_info(obstrepo) - repack_level = get_repack_level(obj_info) + obj_info = grokmirror.get_repo_obj_info(obstrepo) + repack_level = get_repack_level(obj_info) + if repack_level > 1 and nextcheck > today: + # Don't do full repacks outside of schedule + repack_level = 1 if repack_level: to_process.add((obstrepo, 'repack', repack_level)) diff --git a/grokmirror/manifest.py b/grokmirror/manifest.py index 0cdce96..c221cef 100755 --- a/grokmirror/manifest.py +++ b/grokmirror/manifest.py @@ -37,6 +37,12 @@ def update_manifest(manifest, toplevel, fullpath, usenow): sys.exit(1) gitdir = '/' + os.path.relpath(fullpath, toplevel) + # Ignore it if it's an empty git repository + fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True) + if not fp: + logger.info('%s has no heads, ignoring', gitdir) + return + if gitdir not in manifest: # We didn't normalize paths to be always with a leading '/', so # check the manifest for both and make sure we only save the path with a leading / @@ -49,12 +55,6 @@ def update_manifest(manifest, toplevel, fullpath, usenow): else: logger.info('Updating %s in the manifest', gitdir) - # Ignore it if it's an empty git repository - fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True) - if not fp: - logger.info('%s has no heads, ignoring', gitdir) - return - description = None try: descfile = os.path.join(fullpath, 'description') @@ -75,7 +75,13 @@ def update_manifest(manifest, toplevel, fullpath, usenow): args = ['for-each-ref', '--sort=-committerdate', '--format=%(committerdate:iso-strict)', '--count=1'] ecode, out, err = grokmirror.run_git_command(fullpath, args) if len(out): - modified = datetime.datetime.fromisoformat(out) + try: + modified = datetime.datetime.fromisoformat(out) + except AttributeError: + # Python 3.6 doesn't have fromisoformat + # remove : from the TZ info + out = out[:-3] + out[-2:] + modified = datetime.datetime.strptime(out, '%Y-%m-%dT%H:%M:%S%z') if not modified: modified = datetime.datetime.now() @@ -208,6 +214,9 @@ def parse_args(): help='When running with arguments, wait if manifest is not ' 'there (can be useful when multiple writers are writing ' 'the manifest)') + op.add_option('-o', '--fetch-objstore', dest='fetchobst', + action='store_true', default=False, + help='Fetch updates into objstore repo (if used)') op.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Be verbose and tell us what you are doing') @@ -226,8 +235,9 @@ def parse_args(): def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False, check_export_ok=False, purge=False, remove=False, - pretty=False, ignore=None, wait=False, verbose=False): + pretty=False, ignore=None, wait=False, verbose=False, fetchobst=False): + startt = datetime.datetime.now() if args is None: args = list() if ignore is None: @@ -305,6 +315,7 @@ def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False, symlinks = list() # noinspection PyTypeChecker run = em.counter(total=len(gitdirs), desc='Processing:', unit='repos', leave=False) + tofetch = set() for gitdir in gitdirs: run.update() # check to make sure this gitdir is ok to export @@ -324,16 +335,29 @@ def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False, symlinks.append(gitdir) else: update_manifest(manifest, toplevel, gitdir, usenow) - - logger.info('Updated %s records in %0.2fs', len(gitdirs), run.elapsed) - run.close() - em.stop() + if fetchobst: + # Do it after we're done with manifest, to avoid keeping it locked + tofetch.add(gitdir) if len(symlinks): set_symlinks(manifest, toplevel, symlinks) grokmirror.write_manifest(manifile, manifest, pretty=pretty) grokmirror.manifest_unlock(manifile) + run.close() + em.stop() + + for gitdir in tofetch: + altrepo = grokmirror.get_altrepo(gitdir) + if altrepo and re.match(uuidre, altrepo, flags=re.I): + logger.info('Fetching objects into %s', os.path.basename(altrepo)) + grokmirror.fetch_objstore_repo(altrepo, gitdir) + + elapsed = datetime.datetime.now() - startt + if len(gitdirs) > 1: + logger.info('Updated %s records in %ds', len(gitdirs), elapsed.total_seconds()) + else: + logger.info('Done in %0.2fs', elapsed.total_seconds()) def command(): @@ -343,7 +367,8 @@ def command(): opts.manifile, opts.toplevel, args=args, logfile=opts.logfile, usenow=opts.usenow, check_export_ok=opts.check_export_ok, purge=opts.purge, remove=opts.remove, pretty=opts.pretty, - ignore=opts.ignore, wait=opts.wait, verbose=opts.verbose) + ignore=opts.ignore, wait=opts.wait, verbose=opts.verbose, + fetchobst=opts.fetchobst) if __name__ == '__main__': diff --git a/grokmirror/pull.py b/grokmirror/pull.py index 52cf913..7a40748 100755 --- a/grokmirror/pull.py +++ b/grokmirror/pull.py @@ -116,7 +116,9 @@ def queue_worker(config, gitdir, repoinfo, action, obstrepo, is_private): logger.debug('FP match, not pulling %s', gitdir) if success: - set_agefile(toplevel, gitdir, repoinfo.get('modified')) + modified = repoinfo.get('modified', None) + if modified is not None: + set_agefile(toplevel, gitdir, modified) if my_fp is not None: grokmirror.set_repo_fingerprint(toplevel, gitdir, fingerprint=my_fp) @@ -384,7 +386,49 @@ def find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups, mappi obstrepo = os.path.join(obstdir, '%s.git' % forkgroup) if not os.path.isdir(obstrepo): - # No siblings matched an existing objstore repo, we are the first + # No siblings matched an existing objstore repo + # Do we have any existing siblings that were cloned without obstrepo? + # This would happen when an initial fork is created of an existing repo. + found = False + for s_fullpath in forkgroups[forkgroup]: + if os.path.isdir(s_fullpath): + is_private = False + s_gitdir = '/' + os.path.relpath(s_fullpath, toplevel) + for privmask in privmasks: + # Does this repo match privrepo + if fnmatch.fnmatch(s_gitdir, privmask): + is_private = True + break + if is_private: + # Can't use this one, as it's private + continue + # Make a new obstrepo and fetch it there + try: + # XXX: Need to deal with a situation if this repository is not known to us + # via manifests. + logger.debug('reusing existing %s as new obstrepo %s', s_gitdir, obstrepo) + obstrepo = grokmirror.setup_objstore_repo(obstdir, name=forkgroup) + # We want to prevent other siblings from being fetched until + # we have some objects available + pending_obstrepos.add(obstrepo) + logger.debug('locked obstrepo %s', obstrepo) + mapping[s_fullpath] = obstrepo + grokmirror.add_repo_to_objstore(obstrepo, s_fullpath) + grokmirror.set_altrepo(s_fullpath, obstrepo) + candidates.add((s_gitdir, 'objstore', obstrepo, False)) + found = True + except IOError: + # External process is keeping it locked + logger.debug('cannot reuse %s until %s is available', s_gitdir, obstrepo) + # mark all siblings as ignore + for sf in forkgroups[forkgroup]: + ignore.add(sf) + break + + if found: + continue + + # No existing repos we can reuse, so let's start a new objstore repo. # But wait, are we a private repo? obstrepo = grokmirror.setup_objstore_repo(obstdir, name=forkgroup) if is_private: @@ -403,7 +447,7 @@ def find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups, mappi if not found: # clone as a non-sibling repo, then logger.debug('all siblings are private, so clone as individual repo') - candidates.add((c_gitdir, 'clone', None, True)) + candidates.add((c_gitdir, 'init', None, True)) continue else: # We'll add it when we come to it @@ -647,8 +691,8 @@ def pull_mirror(config, verbose=False, force=False, nomtime=False, r_forkgroups = dict() for gitdir in set(r_culled.keys()): e_cmp.update() - if r_culled[gitdir]['fingerprint'] is None: - logger.critical('Manifest files without fingeprints no longer supported.') + if not r_culled[gitdir].get('fingerprint', None): + logger.critical('Repos without fingerprint info (skipped): %s', gitdir) continue fullpath = os.path.join(toplevel, gitdir.lstrip('/')) @@ -797,12 +841,18 @@ def pull_mirror(config, verbose=False, force=False, nomtime=False, candidates = find_next_best_actions(toplevel, todo, obstdir, privmasks, forkgroups, mapping, maxactions=60-len(results)) for gitrepo, action, refrepo, is_private in candidates: - if action == 'init': + if action in ('init', 'objstore'): freshclones.add(gitrepo) - todo.remove((gitrepo, action)) + if action == 'objstore': + # Objstore actions aren't in the initial set, because we add them + # on the fly as we repurpose existing repos for objstore + repoinfo = l_manifest[gitrepo] + else: + repoinfo = r_culled[gitrepo] + todo.remove((gitrepo, action)) logger.info(' Queued: %s', gitrepo) e_que.update() - res = wpool.apply_async(queue_worker, (config, gitrepo, r_culled[gitrepo], + res = wpool.apply_async(queue_worker, (config, gitrepo, repoinfo, action, refrepo, is_private)) results.append(res) e_que.refresh() |