aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-07-23 16:32:52 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-07-23 17:19:05 -0400
commit3047754fb1b719f7e2f997341ebd313981660956 (patch)
tree0c3cd8d1ce3e1de5ae2fdb6b924d7ecc06084aeb
parent6dc859cd4afeba8bf7ed61a35e952b8355128500 (diff)
downloadgrokmirror-3047754fb1b719f7e2f997341ebd313981660956.tar.gz
More work to support public-inbox processing
- add another hook post_work_complete_hook, so that we can call public-inbox-extindex once per update cycle, as is recommended - adjust grok-pi-indexer to handle three different hook invocations Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--grokmirror.conf18
-rw-r--r--grokmirror/pi_indexer.py199
-rwxr-xr-xgrokmirror/pull.py70
3 files changed, 173 insertions, 114 deletions
diff --git a/grokmirror.conf b/grokmirror.conf
index 31895ad..a450e59 100644
--- a/grokmirror.conf
+++ b/grokmirror.conf
@@ -139,15 +139,21 @@ projectslist_symlinks = no
# argument. You can define multiple hooks if you separate them by
# newline+whitespace.
post_update_hook =
+#
# A hook to execute after all new repositories are done cloning.
-# It receives the path to the toplevel as the only parameter and
-# the list of freshly cloned repositories on stdin, newline-terminated.
-# This hook is useful when you want to make sure that certain jobs
-# only run when there were fresh clones and they are all done.
-# You can define multiple hooks if you separate them by
-# newline+whitespace.
+# It receives no arguments, only full paths to freshly cloned repositories
+# on stdin, newline-terminated. This hook is useful when you want to make
+# sure that certain jobs only run when there were fresh clones and they
+# have all completed cloning.
+# You can define multiple hooks if you separate them by newline+whitespace.
post_clone_complete_hook =
#
+# A hook to execute after the work queue is completely cleared, in case
+# you want to run some tasks only after all updates are completed. Does not
+# receive any arguments or stdin contents.
+# You can define multiple hooks if you separate them by newline+whitespace.
+post_work_complete_hook =
+#
# Should we purge repositories that are not present in the remote
# manifest? If set to "no" this can be overridden via the -p flag to
# grok-pull (useful if you have a very large collection of repos
diff --git a/grokmirror/pi_indexer.py b/grokmirror/pi_indexer.py
index c7991b0..cb26b1e 100644
--- a/grokmirror/pi_indexer.py
+++ b/grokmirror/pi_indexer.py
@@ -31,16 +31,20 @@ def get_pi_repos(inboxdir: str) -> list:
def index_pi_inbox(inboxdir: str, opts) -> bool:
- logger.info('Indexing inboxdir %s', inboxdir)
+ logger.info('pi-index %s', inboxdir)
success = True
# Check that msgmap.sqlite3 is there
msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3')
if not os.path.exists(msgmapdbf):
- logger.critical('Inboxdir not initialized: %s', inboxdir)
+ logger.info('Inboxdir not initialized: %s', inboxdir)
return False
- piargs = ['public-inbox-index', inboxdir]
- env = {'PI_CONFIG': opts.piconfig}
+ piargs = ['public-inbox-index', '--no-update-extindex', inboxdir]
+
+ env = {
+ 'PI_CONFIG': opts.piconfig,
+ 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'),
+ }
try:
ec, out, err = grokmirror.run_shell_command(piargs, env=env)
if ec > 0:
@@ -59,7 +63,7 @@ def init_pi_inbox(inboxdir: str, opts) -> bool:
if opts.listid_priority:
boosts = list(reversed(opts.listid_priority.split(',')))
- logger.info('Initializing inboxdir %s', inboxdir)
+ logger.info('pi-init %s', inboxdir)
# Lock all member repos so they don't get updated in the process
pi_repos = get_pi_repos(inboxdir)
origins = None
@@ -143,9 +147,12 @@ def init_pi_inbox(inboxdir: str, opts) -> bool:
piargs += ['-c', f'{opt}={val}']
piargs += [inboxname, inboxdir, local_url]
piargs += addresses
- print(piargs)
+ logger.debug('piargs=%s', piargs)
- env = {'PI_CONFIG': opts.piconfig}
+ env = {
+ 'PI_CONFIG': opts.piconfig,
+ 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'),
+ }
try:
ec, out, err = grokmirror.run_shell_command(piargs, env=env)
if ec > 0:
@@ -156,7 +163,7 @@ def init_pi_inbox(inboxdir: str, opts) -> bool:
success = False
if success:
- with open(os.path.join(inboxdir, 'description', 'w')) as fh:
+ with open(os.path.join(inboxdir, 'description'), 'w') as fh:
fh.write(description)
# Unlock all members
@@ -177,38 +184,122 @@ def get_inboxdirs(repos: list) -> set:
return inboxdirs
+def process_inboxdirs(inboxdirs: list, opts, init: bool = False):
+ if not len(inboxdirs):
+ logger.info('Nothing to do')
+ sys.exit(0)
+
+ for inboxdir in inboxdirs:
+ # Check if msgmap.sqlite3 is there -- it can be a clone of a new epoch,
+ # so no initialization is necessary
+ msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3')
+ if init and not os.path.exists(msgmapdbf):
+ # Initialize this public-inbox repo
+ if not init_pi_inbox(inboxdir, opts):
+ logger.critical('Could not init %s', inboxdir)
+ continue
+
+ if os.path.exists(msgmapdbf) and not index_pi_inbox(inboxdir, opts):
+ logger.critical('Unable to index %s', inboxdir)
+
+
+def cmd_init(opts):
+ inboxdirs = list()
+ if opts.inboxdir:
+ if opts.forceinit:
+ msgmapdbf = os.path.join(opts.inboxdir, 'msgmap.sqlite3')
+ # Delete msgmap and xap15 if present and reinitialize
+ if os.path.exists(msgmapdbf):
+ logger.critical('Reinitializing %s', opts.inboxdir)
+ os.unlink(msgmapdbf)
+ if os.path.exists(os.path.join(opts.inboxdir, 'xap15')):
+ shutil.rmtree(os.path.join(opts.inboxdir, 'xap15'))
+ inboxdirs.append(opts.inboxdir)
+ if not sys.stdin.isatty():
+ repos = list()
+ for line in sys.stdin.read().split('\n'):
+ if not line:
+ continue
+ repos.append(line)
+ inboxdirs += get_inboxdirs(repos)
+
+ process_inboxdirs(inboxdirs, opts, init=True)
+
+
+def cmd_update(opts):
+ if not opts.repo[0].endswith('.git'):
+ # Assume we're working with toplevel inboxdir
+ inboxdirs = opts.repo
+ else:
+ inboxdirs = get_inboxdirs(opts.repo)
+ process_inboxdirs(inboxdirs, opts)
+
+
+def cmd_extindex(opts):
+ env = {
+ 'PI_CONFIG': opts.piconfig,
+ 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'),
+ }
+ logger.info('Running extindex --all')
+ piargs = ['public-inbox-extindex', '--all']
+ try:
+ ec, out, err = grokmirror.run_shell_command(piargs, env=env)
+ if ec > 0:
+ logger.critical('Unable to run public-inbox-extindex: %s', err)
+ sys.exit(1)
+ except Exception as ex: # noqa
+ logger.critical('Unable to run public-inbox-extindex: %s', ex)
+ sys.exit(1)
+
+
def command():
import argparse
global logger
# noinspection PyTypeChecker
- op = argparse.ArgumentParser(prog='grok-pi-indexer',
+ ap = argparse.ArgumentParser(prog='grok-pi-indexer',
description='Properly initialize and update mirrored public-inbox repositories',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- op.add_argument('-v', '--verbose', action='store_true',
+ ap.add_argument('-v', '--verbose', action='store_true',
default=False,
help='Be verbose and tell us what you are doing')
- op.add_argument('-c', '--pi-config', dest='piconfig', required=True,
+ ap.add_argument('-c', '--pi-config', dest='piconfig', required=True,
help='Location of the public-inbox configuration file')
- op.add_argument('-l', '--logfile',
+ ap.add_argument('-l', '--logfile',
help='Log activity in this log file')
- op.add_argument('--local-hostname', dest='local_host',
- default='http://localhost/',
- help='URL of the local mirror toplevel')
- op.add_argument('--origin-hostname', dest='origin_host',
- default='https://lore.kernel.org/',
- help='URL of the origin toplevel serving config files')
- op.add_argument('--listid-priority', dest='listid_priority',
- default='*.linux.dev,*.kernel.org',
- help='List-Ids priority order (comma-separated, can use shell globbing)')
- op.add_argument('--indexlevel', default='full',
- help='Indexlevel to use with public-inbox-init (full, medium, basic)')
- op.add_argument('--force-init', dest='forceinit', action='store_true', default=False,
- help='Force (re-)initialization of the repo passed as argument')
- op.add_argument('repo', nargs='?',
- help='Full path to foo/git/N.git public-inbox repository')
-
- opts = op.parse_args()
+
+ sp = ap.add_subparsers(help='sub-command help', dest='subcmd')
+ sp_init = sp.add_parser('init', help='Run public-inbox-init+index on repositories passed via stdin')
+
+ sp_init.add_argument('--local-hostname', dest='local_host',
+ default='http://localhost/',
+ help='URL of the local mirror toplevel')
+ sp_init.add_argument('--origin-hostname', dest='origin_host',
+ default='https://lore.kernel.org/',
+ help='URL of the origin toplevel serving config files')
+ sp_init.add_argument('--listid-priority', dest='listid_priority',
+ default='*.linux.dev,*.kernel.org',
+ help='List-Ids priority order (comma-separated, can use shell globbing)')
+ sp_init.add_argument('--indexlevel', default='full',
+ help='Indexlevel to use with public-inbox-init (full, medium, basic)')
+ sp_init.add_argument('--force-reinit', dest='forceinit', action='store_true', default=False,
+ help='Force a full (re-)init of an inboxdir')
+ sp_init.add_argument('inboxdir', nargs='?',
+ help='Path to toplevel inboxdir (non-hook mode)')
+ sp_init.set_defaults(func=cmd_init)
+
+ sp_update = sp.add_parser('update', help='Run public-inbox-index on passed repository path')
+ sp_update.add_argument('repo', nargs=1,
+ help='Full path to foo/git/N.git public-inbox repository')
+ sp_update.set_defaults(func=cmd_update)
+
+ sp_extindex = sp.add_parser('extindex', help='Run extindex on all inboxes')
+ sp_extindex.set_defaults(func=cmd_extindex)
+
+ opts = ap.parse_args()
+ if 'func' not in opts:
+ ap.print_help()
+ sys.exit(1)
logfile = opts.logfile
if opts.verbose:
@@ -217,55 +308,7 @@ def command():
loglevel = logging.INFO
logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose)
- if opts.repo:
- # If we have a positional argument, then this is a post-update hook. We only
- # run the indexer if the inboxdir has already been initialized
- mode = 'update'
- if not opts.repo.endswith('.git'):
- # Assume we're working with toplevel inboxdir
- inboxdirs = [opts.repo]
- else:
- inboxdirs = get_inboxdirs([opts.repo])
- elif not sys.stdin.isatty():
- # This looks like a post_clone_complete_hook invocation
- mode = 'clone'
- repos = list()
- for line in sys.stdin.read().split('\n'):
- if not line:
- continue
- repos.append(line)
- inboxdirs = get_inboxdirs(repos)
- else:
- logger.critical('Pass either the repo to update, or list of freshly cloned repos on stdin')
- sys.exit(1)
-
- if not len(inboxdirs):
- logger.info('No updated public-inbox repositories, exiting')
- sys.exit(0)
-
- for inboxdir in inboxdirs:
- # Check if msgmap.sqlite3 is there -- it can be a clone of a new epoch,
- # so no initialization is necessary
- msgmapdbf = os.path.join(inboxdir, 'msgmap.sqlite3')
- if not os.path.exists(msgmapdbf) and mode == 'clone':
- # Initialize this public-inbox repo
- if not init_pi_inbox(inboxdir, opts):
- logger.critical('Could not init %s', inboxdir)
- continue
- elif opts.forceinit and mode == 'update':
- # Delete msgmap and xap15 if present and reinitialize
- if os.path.exists(msgmapdbf):
- logger.critical('Reinitializing %s', inboxdir)
- os.unlink(msgmapdbf)
- if os.path.exists(os.path.join(inboxdir, 'xap15')):
- shutil.rmtree(os.path.join(inboxdir, 'xap15'))
- if not init_pi_inbox(inboxdir, opts):
- logger.critical('Could not init %s', inboxdir)
- continue
-
- logger.info('Indexing %s', inboxdir)
- if not index_pi_inbox(inboxdir, opts):
- logger.critical('Unable to index %s', inboxdir)
+ opts.func(opts)
if __name__ == '__main__':
diff --git a/grokmirror/pull.py b/grokmirror/pull.py
index cfc03e3..7472298 100755
--- a/grokmirror/pull.py
+++ b/grokmirror/pull.py
@@ -387,7 +387,7 @@ def pull_worker(config, q_pull, q_spa, q_done):
logger.info(' refetch: %s (try #%s)', gitdir, retries)
if success:
- run_post_update_hook(toplevel, gitdir, config['pull'].get('post_update_hook', ''))
+ run_post_update_hook(config, fullpath)
post_pull_fp = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=True)
repoinfo['fingerprint'] = post_pull_fp
altrepo = grokmirror.get_altrepo(fullpath)
@@ -568,56 +568,63 @@ def set_agefile(toplevel, gitdir, last_modified):
logger.debug('Wrote "%s" into %s', cgit_fmt, agefile)
-def run_post_clone_complete_hook(config, clones):
- toplevel = os.path.realpath(config['core'].get('toplevel'))
- stdin = '\n'.join(clones).encode() + b'\n'
- hookscripts = config['pull'].get('post_clone_complete_hook', '')
- for hookscript in hookscripts.split('\n'):
+def get_hookscripts(config, hookname):
+ hookscripts = list()
+ # And sinker!
+ hookline = config['pull'].get(hookname, '')
+ for hookscript in hookline.split('\n'):
hookscript = os.path.expanduser(hookscript.strip())
sp = shlex.shlex(hookscript, posix=True)
sp.whitespace_split = True
args = list(sp)
+ if not len(args):
+ continue
if not os.access(args[0], os.X_OK):
- logger.warning('post_update_hook %s is not executable', hookscript)
+ logger.warning('hook not executable: %s', hookscript)
continue
+ hookscripts.append(args)
+ return hookscripts
+
+
+def run_post_clone_complete_hook(config, clones):
+ stdin = '\n'.join(clones) + '\n'
+ hookscripts = get_hookscripts(config, 'post_clone_complete_hook')
+ for args in hookscripts:
logger.info(' inithook: %s', ' '.join(args))
logger.debug('Running: %s', ' '.join(args))
- args.append(toplevel)
- ecode, output, error = grokmirror.run_shell_command(args, stdin=stdin)
+ logger.debug('Stdin: ---start---')
+ logger.debug(stdin)
+ logger.debug('Stdin: ---end---')
+ ecode, output, error = grokmirror.run_shell_command(args, stdin=stdin.encode())
if error:
- # Put hook stderror into warning
logger.warning('Hook Stderr: %s', error)
if output:
- # Put hook stdout into info
logger.info('Hook Stdout: %s', output)
-def run_post_update_hook(toplevel, gitdir, hookscripts):
- if not len(hookscripts):
- return
+def run_post_work_complete_hook(config):
+ hookscripts = get_hookscripts(config, 'post_work_complete_hook')
+ for args in hookscripts:
+ logger.info(' workhook: %s', ' '.join(args))
+ logger.debug('Running: %s', ' '.join(args))
+ ecode, output, error = grokmirror.run_shell_command(args)
+ if error:
+ logger.warning('Hook Stderr: %s', error)
+ if output:
+ logger.info('Hook Stdout: %s', output)
- for hookscript in hookscripts.split('\n'):
- hookscript = os.path.expanduser(hookscript.strip())
- sp = shlex.shlex(hookscript, posix=True)
- sp.whitespace_split = True
- args = list(sp)
+def run_post_update_hook(config, fullpath):
+ hookscripts = get_hookscripts(config, 'post_update_hook')
+ for args in hookscripts:
logger.info(' hook: %s', ' '.join(args))
- if not os.access(args[0], os.X_OK):
- logger.warning('post_update_hook %s is not executable', hookscript)
- continue
-
- fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
args.append(fullpath)
logger.debug('Running: %s', ' '.join(args))
ecode, output, error = grokmirror.run_shell_command(args)
-
if error:
- # Put hook stderror into warning
- logger.warning('Hook Stderr (%s): %s', gitdir, error)
+ logger.warning('Hook Stderr (%s): %s', fullpath, error)
if output:
- # Put hook stdout into info
- logger.info('Hook Stdout (%s): %s', gitdir, output)
+ logger.info('Hook Stdout (%s): %s', fullpath, output)
def pull_repo(fullpath, remotename):
@@ -1176,6 +1183,7 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False):
bad = 0
loopmark = None
post_clone_hook = config['pull'].get('post_clone_complete_hook')
+ post_work_hook = config['pull'].get('post_work_complete_hook')
with SignalHandler(config, sw, dws, pws, done):
while True:
for pw in pws:
@@ -1220,7 +1228,7 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False):
pass
# Was it a clone, and are all other clones done?
if post_clone_hook and q_action == 'init':
- cloned.append(gitdir)
+ cloned.append(os.path.join(toplevel, gitdir.lstrip('/')))
more_clones = False
for qgd, qqa in actions:
if qqa == 'init':
@@ -1290,6 +1298,8 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False):
if not len(pws):
if done:
update_manifest(config, done)
+ if post_work_hook:
+ run_post_work_complete_hook(config)
if runonce:
# Wait till spa is done
while True: