diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-07-23 16:32:52 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-07-23 17:19:05 -0400 |
commit | 3047754fb1b719f7e2f997341ebd313981660956 (patch) | |
tree | 0c3cd8d1ce3e1de5ae2fdb6b924d7ecc06084aeb | |
parent | 6dc859cd4afeba8bf7ed61a35e952b8355128500 (diff) | |
download | grokmirror-3047754fb1b719f7e2f997341ebd313981660956.tar.gz |
More work to support public-inbox processing
- add another hook post_work_complete_hook, so that we can call
public-inbox-extindex once per update cycle, as is recommended
- adjust grok-pi-indexer to handle three different hook invocations
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | grokmirror.conf | 18 | ||||
-rw-r--r-- | grokmirror/pi_indexer.py | 199 | ||||
-rwxr-xr-x | grokmirror/pull.py | 70 |
3 files changed, 173 insertions, 114 deletions
diff --git a/grokmirror.conf b/grokmirror.conf index 31895ad..a450e59 100644 --- a/grokmirror.conf +++ b/grokmirror.conf @@ -139,15 +139,21 @@ projectslist_symlinks = no # argument. You can define multiple hooks if you separate them by # newline+whitespace. post_update_hook = +# # A hook to execute after all new repositories are done cloning. -# It receives the path to the toplevel as the only parameter and -# the list of freshly cloned repositories on stdin, newline-terminated. -# This hook is useful when you want to make sure that certain jobs -# only run when there were fresh clones and they are all done. -# You can define multiple hooks if you separate them by -# newline+whitespace. +# It receives no arguments, only full paths to freshly cloned repositories +# on stdin, newline-terminated. This hook is useful when you want to make +# sure that certain jobs only run when there were fresh clones and they +# have all completed cloning. +# You can define multiple hooks if you separate them by newline+whitespace. post_clone_complete_hook = # +# A hook to execute after the work queue is completely cleared, in case +# you want to run some tasks only after all updates are completed. Does not +# receive any arguments or stdin contents. +# You can define multiple hooks if you separate them by newline+whitespace. +post_work_complete_hook = +# # Should we purge repositories that are not present in the remote # manifest? If set to "no" this can be overridden via the -p flag to # grok-pull (useful if you have a very large collection of repos diff --git a/grokmirror/pi_indexer.py b/grokmirror/pi_indexer.py index c7991b0..cb26b1e 100644 --- a/grokmirror/pi_indexer.py +++ b/grokmirror/pi_indexer.py @@ -31,16 +31,20 @@ def get_pi_repos(inboxdir: str) -> list: def index_pi_inbox(inboxdir: str, opts) -> bool: - logger.info('Indexing inboxdir %s', inboxdir) + logger.info('pi-index %s', inboxdir) success = True # Check that msgmap.sqlite3 is there msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3') if not os.path.exists(msgmapdbf): - logger.critical('Inboxdir not initialized: %s', inboxdir) + logger.info('Inboxdir not initialized: %s', inboxdir) return False - piargs = ['public-inbox-index', inboxdir] - env = {'PI_CONFIG': opts.piconfig} + piargs = ['public-inbox-index', '--no-update-extindex', inboxdir] + + env = { + 'PI_CONFIG': opts.piconfig, + 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), + } try: ec, out, err = grokmirror.run_shell_command(piargs, env=env) if ec > 0: @@ -59,7 +63,7 @@ def init_pi_inbox(inboxdir: str, opts) -> bool: if opts.listid_priority: boosts = list(reversed(opts.listid_priority.split(','))) - logger.info('Initializing inboxdir %s', inboxdir) + logger.info('pi-init %s', inboxdir) # Lock all member repos so they don't get updated in the process pi_repos = get_pi_repos(inboxdir) origins = None @@ -143,9 +147,12 @@ def init_pi_inbox(inboxdir: str, opts) -> bool: piargs += ['-c', f'{opt}={val}'] piargs += [inboxname, inboxdir, local_url] piargs += addresses - print(piargs) + logger.debug('piargs=%s', piargs) - env = {'PI_CONFIG': opts.piconfig} + env = { + 'PI_CONFIG': opts.piconfig, + 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), + } try: ec, out, err = grokmirror.run_shell_command(piargs, env=env) if ec > 0: @@ -156,7 +163,7 @@ def init_pi_inbox(inboxdir: str, opts) -> bool: success = False if success: - with open(os.path.join(inboxdir, 'description', 'w')) as fh: + with open(os.path.join(inboxdir, 'description'), 'w') as fh: fh.write(description) # Unlock all members @@ -177,38 +184,122 @@ def get_inboxdirs(repos: list) -> set: return inboxdirs +def process_inboxdirs(inboxdirs: list, opts, init: bool = False): + if not len(inboxdirs): + logger.info('Nothing to do') + sys.exit(0) + + for inboxdir in inboxdirs: + # Check if msgmap.sqlite3 is there -- it can be a clone of a new epoch, + # so no initialization is necessary + msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3') + if init and not os.path.exists(msgmapdbf): + # Initialize this public-inbox repo + if not init_pi_inbox(inboxdir, opts): + logger.critical('Could not init %s', inboxdir) + continue + + if os.path.exists(msgmapdbf) and not index_pi_inbox(inboxdir, opts): + logger.critical('Unable to index %s', inboxdir) + + +def cmd_init(opts): + inboxdirs = list() + if opts.inboxdir: + if opts.forceinit: + msgmapdbf = os.path.join(opts.inboxdir, 'msgmap.sqlite3') + # Delete msgmap and xap15 if present and reinitialize + if os.path.exists(msgmapdbf): + logger.critical('Reinitializing %s', opts.inboxdir) + os.unlink(msgmapdbf) + if os.path.exists(os.path.join(opts.inboxdir, 'xap15')): + shutil.rmtree(os.path.join(opts.inboxdir, 'xap15')) + inboxdirs.append(opts.inboxdir) + if not sys.stdin.isatty(): + repos = list() + for line in sys.stdin.read().split('\n'): + if not line: + continue + repos.append(line) + inboxdirs += get_inboxdirs(repos) + + process_inboxdirs(inboxdirs, opts, init=True) + + +def cmd_update(opts): + if not opts.repo[0].endswith('.git'): + # Assume we're working with toplevel inboxdir + inboxdirs = opts.repo + else: + inboxdirs = get_inboxdirs(opts.repo) + process_inboxdirs(inboxdirs, opts) + + +def cmd_extindex(opts): + env = { + 'PI_CONFIG': opts.piconfig, + 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), + } + logger.info('Running extindex --all') + piargs = ['public-inbox-extindex', '--all'] + try: + ec, out, err = grokmirror.run_shell_command(piargs, env=env) + if ec > 0: + logger.critical('Unable to run public-inbox-extindex: %s', err) + sys.exit(1) + except Exception as ex: # noqa + logger.critical('Unable to run public-inbox-extindex: %s', ex) + sys.exit(1) + + def command(): import argparse global logger # noinspection PyTypeChecker - op = argparse.ArgumentParser(prog='grok-pi-indexer', + ap = argparse.ArgumentParser(prog='grok-pi-indexer', description='Properly initialize and update mirrored public-inbox repositories', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - op.add_argument('-v', '--verbose', action='store_true', + ap.add_argument('-v', '--verbose', action='store_true', default=False, help='Be verbose and tell us what you are doing') - op.add_argument('-c', '--pi-config', dest='piconfig', required=True, + ap.add_argument('-c', '--pi-config', dest='piconfig', required=True, help='Location of the public-inbox configuration file') - op.add_argument('-l', '--logfile', + ap.add_argument('-l', '--logfile', help='Log activity in this log file') - op.add_argument('--local-hostname', dest='local_host', - default='http://localhost/', - help='URL of the local mirror toplevel') - op.add_argument('--origin-hostname', dest='origin_host', - default='https://lore.kernel.org/', - help='URL of the origin toplevel serving config files') - op.add_argument('--listid-priority', dest='listid_priority', - default='*.linux.dev,*.kernel.org', - help='List-Ids priority order (comma-separated, can use shell globbing)') - op.add_argument('--indexlevel', default='full', - help='Indexlevel to use with public-inbox-init (full, medium, basic)') - op.add_argument('--force-init', dest='forceinit', action='store_true', default=False, - help='Force (re-)initialization of the repo passed as argument') - op.add_argument('repo', nargs='?', - help='Full path to foo/git/N.git public-inbox repository') - - opts = op.parse_args() + + sp = ap.add_subparsers(help='sub-command help', dest='subcmd') + sp_init = sp.add_parser('init', help='Run public-inbox-init+index on repositories passed via stdin') + + sp_init.add_argument('--local-hostname', dest='local_host', + default='http://localhost/', + help='URL of the local mirror toplevel') + sp_init.add_argument('--origin-hostname', dest='origin_host', + default='https://lore.kernel.org/', + help='URL of the origin toplevel serving config files') + sp_init.add_argument('--listid-priority', dest='listid_priority', + default='*.linux.dev,*.kernel.org', + help='List-Ids priority order (comma-separated, can use shell globbing)') + sp_init.add_argument('--indexlevel', default='full', + help='Indexlevel to use with public-inbox-init (full, medium, basic)') + sp_init.add_argument('--force-reinit', dest='forceinit', action='store_true', default=False, + help='Force a full (re-)init of an inboxdir') + sp_init.add_argument('inboxdir', nargs='?', + help='Path to toplevel inboxdir (non-hook mode)') + sp_init.set_defaults(func=cmd_init) + + sp_update = sp.add_parser('update', help='Run public-inbox-index on passed repository path') + sp_update.add_argument('repo', nargs=1, + help='Full path to foo/git/N.git public-inbox repository') + sp_update.set_defaults(func=cmd_update) + + sp_extindex = sp.add_parser('extindex', help='Run extindex on all inboxes') + sp_extindex.set_defaults(func=cmd_extindex) + + opts = ap.parse_args() + if 'func' not in opts: + ap.print_help() + sys.exit(1) logfile = opts.logfile if opts.verbose: @@ -217,55 +308,7 @@ def command(): loglevel = logging.INFO logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose) - if opts.repo: - # If we have a positional argument, then this is a post-update hook. We only - # run the indexer if the inboxdir has already been initialized - mode = 'update' - if not opts.repo.endswith('.git'): - # Assume we're working with toplevel inboxdir - inboxdirs = [opts.repo] - else: - inboxdirs = get_inboxdirs([opts.repo]) - elif not sys.stdin.isatty(): - # This looks like a post_clone_complete_hook invocation - mode = 'clone' - repos = list() - for line in sys.stdin.read().split('\n'): - if not line: - continue - repos.append(line) - inboxdirs = get_inboxdirs(repos) - else: - logger.critical('Pass either the repo to update, or list of freshly cloned repos on stdin') - sys.exit(1) - - if not len(inboxdirs): - logger.info('No updated public-inbox repositories, exiting') - sys.exit(0) - - for inboxdir in inboxdirs: - # Check if msgmap.sqlite3 is there -- it can be a clone of a new epoch, - # so no initialization is necessary - msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3') - if not os.path.exists(msgmapdbf) and mode == 'clone': - # Initialize this public-inbox repo - if not init_pi_inbox(inboxdir, opts): - logger.critical('Could not init %s', inboxdir) - continue - elif opts.forceinit and mode == 'update': - # Delete msgmap and xap15 if present and reinitialize - if os.path.exists(msgmapdbf): - logger.critical('Reinitializing %s', inboxdir) - os.unlink(msgmapdbf) - if os.path.exists(os.path.join(inboxdir, 'xap15')): - shutil.rmtree(os.path.join(inboxdir, 'xap15')) - if not init_pi_inbox(inboxdir, opts): - logger.critical('Could not init %s', inboxdir) - continue - - logger.info('Indexing %s', inboxdir) - if not index_pi_inbox(inboxdir, opts): - logger.critical('Unable to index %s', inboxdir) + opts.func(opts) if __name__ == '__main__': diff --git a/grokmirror/pull.py b/grokmirror/pull.py index cfc03e3..7472298 100755 --- a/grokmirror/pull.py +++ b/grokmirror/pull.py @@ -387,7 +387,7 @@ def pull_worker(config, q_pull, q_spa, q_done): logger.info(' refetch: %s (try #%s)', gitdir, retries) if success: - run_post_update_hook(toplevel, gitdir, config['pull'].get('post_update_hook', '')) + run_post_update_hook(config, fullpath) post_pull_fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True) repoinfo['fingerprint'] = post_pull_fp altrepo = grokmirror.get_altrepo(fullpath) @@ -568,56 +568,63 @@ def set_agefile(toplevel, gitdir, last_modified): logger.debug('Wrote "%s" into %s', cgit_fmt, agefile) -def run_post_clone_complete_hook(config, clones): - toplevel = os.path.realpath(config['core'].get('toplevel')) - stdin = '\n'.join(clones).encode() + b'\n' - hookscripts = config['pull'].get('post_clone_complete_hook', '') - for hookscript in hookscripts.split('\n'): +def get_hookscripts(config, hookname): + hookscripts = list() + # And sinker! + hookline = config['pull'].get(hookname, '') + for hookscript in hookline.split('\n'): hookscript = os.path.expanduser(hookscript.strip()) sp = shlex.shlex(hookscript, posix=True) sp.whitespace_split = True args = list(sp) + if not len(args): + continue if not os.access(args[0], os.X_OK): - logger.warning('post_update_hook %s is not executable', hookscript) + logger.warning('hook not executable: %s', hookscript) continue + hookscripts.append(args) + return hookscripts + + +def run_post_clone_complete_hook(config, clones): + stdin = '\n'.join(clones) + '\n' + hookscripts = get_hookscripts(config, 'post_clone_complete_hook') + for args in hookscripts: logger.info(' inithook: %s', ' '.join(args)) logger.debug('Running: %s', ' '.join(args)) - args.append(toplevel) - ecode, output, error = grokmirror.run_shell_command(args, stdin=stdin) + logger.debug('Stdin: ---start---') + logger.debug(stdin) + logger.debug('Stdin: ---end---') + ecode, output, error = grokmirror.run_shell_command(args, stdin=stdin.encode()) if error: - # Put hook stderror into warning logger.warning('Hook Stderr: %s', error) if output: - # Put hook stdout into info logger.info('Hook Stdout: %s', output) -def run_post_update_hook(toplevel, gitdir, hookscripts): - if not len(hookscripts): - return +def run_post_work_complete_hook(config): + hookscripts = get_hookscripts(config, 'post_work_complete_hook') + for args in hookscripts: + logger.info(' workhook: %s', ' '.join(args)) + logger.debug('Running: %s', ' '.join(args)) + ecode, output, error = grokmirror.run_shell_command(args) + if error: + logger.warning('Hook Stderr: %s', error) + if output: + logger.info('Hook Stdout: %s', output) - for hookscript in hookscripts.split('\n'): - hookscript = os.path.expanduser(hookscript.strip()) - sp = shlex.shlex(hookscript, posix=True) - sp.whitespace_split = True - args = list(sp) +def run_post_update_hook(config, fullpath): + hookscripts = get_hookscripts(config, 'post_update_hook') + for args in hookscripts: logger.info(' hook: %s', ' '.join(args)) - if not os.access(args[0], os.X_OK): - logger.warning('post_update_hook %s is not executable', hookscript) - continue - - fullpath = os.path.join(toplevel, gitdir.lstrip('/')) args.append(fullpath) logger.debug('Running: %s', ' '.join(args)) ecode, output, error = grokmirror.run_shell_command(args) - if error: - # Put hook stderror into warning - logger.warning('Hook Stderr (%s): %s', gitdir, error) + logger.warning('Hook Stderr (%s): %s', fullpath, error) if output: - # Put hook stdout into info - logger.info('Hook Stdout (%s): %s', gitdir, output) + logger.info('Hook Stdout (%s): %s', fullpath, output) def pull_repo(fullpath, remotename): @@ -1176,6 +1183,7 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False): bad = 0 loopmark = None post_clone_hook = config['pull'].get('post_clone_complete_hook') + post_work_hook = config['pull'].get('post_work_complete_hook') with SignalHandler(config, sw, dws, pws, done): while True: for pw in pws: @@ -1220,7 +1228,7 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False): pass # Was it a clone, and are all other clones done? if post_clone_hook and q_action == 'init': - cloned.append(gitdir) + cloned.append(os.path.join(toplevel, gitdir.lstrip('/'))) more_clones = False for qgd, qqa in actions: if qqa == 'init': @@ -1290,6 +1298,8 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False): if not len(pws): if done: update_manifest(config, done) + if post_work_hook: + run_post_work_complete_hook(config) if runonce: # Wait till spa is done while True: |