diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-10-07 12:18:00 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-10-07 12:18:00 -0400 |
commit | 1f1a8585a53d28983e170d86de4abf1670f0d7dc (patch) | |
tree | 6398d6a5d167f58b66061a91623377dff82d129b | |
parent | 0d3e02a03bcad209abc97f038b5e61a0fc2b3fde (diff) | |
download | grokmirror-1f1a8585a53d28983e170d86de4abf1670f0d7dc.tar.gz |
Add some more features to piper
- Make it a fully supported command called grok-pi-piper
- Add support for reshallowing repos after each piper run,
saving tons of space
- Sprinkle expanduser in a few places to better support user-initiated
operations
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | CHANGELOG.rst | 2 | ||||
-rw-r--r-- | contrib/python-grokmirror.spec | 3 | ||||
-rw-r--r-- | grokmirror/__init__.py | 11 | ||||
-rwxr-xr-x[-rw-r--r--] | grokmirror/pi-piper.py (renamed from contrib/pi-piper.py) | 82 | ||||
-rwxr-xr-x | grokmirror/pull.py | 5 | ||||
-rw-r--r-- | man/grok-pi-piper.1 | 129 | ||||
-rw-r--r-- | man/grok-pi-piper.1.rst | 79 | ||||
-rw-r--r-- | pi-piper.conf (renamed from contrib/pi-piper.conf) | 14 | ||||
-rw-r--r-- | setup.py | 1 |
9 files changed, 290 insertions, 36 deletions
diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5b9c69f..c5e1eee 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ v2.0.2 (2020-10-06) - Provide pi-piper utility for piping new messages from public-inbox repositories. It can be specified as post_update_hook: - post_update_hook = /usr/bin/pi-piper -c ~/pi-piper.conf + post_update_hook = /usr/bin/grok-pi-piper -c ~/.config/pi-piper.conf - Add -r option to grok-manifest to ignore specific refs when calculating repository fingerprint. This is mostly useful for mirroring from gerrit. diff --git a/contrib/python-grokmirror.spec b/contrib/python-grokmirror.spec index 3c96836..9d250bb 100644 --- a/contrib/python-grokmirror.spec +++ b/contrib/python-grokmirror.spec @@ -59,7 +59,6 @@ Obsoletes: python-%{srcname} < 2, python2-%{srcname} < 2 %{__install} -m 0644 contrib/*.timer %{buildroot}/%{_unitdir}/ %{__install} -m 0644 contrib/logrotate %{buildroot}/%{_sysconfdir}/logrotate.d/grokmirror %{__install} -m 0644 grokmirror.conf %{buildroot}/%{_sysconfdir}/%{srcname}/grokmirror.conf.example -%{__install} -m 0755 contrib/pi-piper.py %{buildroot}/%{_bindir}/pi-piper echo "d /run/%{srcname} 0755 %{username} %{groupname}" > %{buildroot}/%{_tmpfilesdir}/%{srcname}.conf @@ -72,7 +71,7 @@ exit 0 %files -n python3-%{srcname} %license LICENSE.txt -%doc README.rst grokmirror.conf contrib/pi-piper.conf +%doc README.rst grokmirror.conf pi-piper.conf %dir %attr(0750, %{username}, %{groupname}) %{userhome} %dir %attr(0755, %{username}, %{groupname}) %{_localstatedir}/log/%{srcname}/ %dir %attr(0755, %{username}, %{groupname}) /run/%{srcname}/ diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py index f888593..b37408f 100644 --- a/grokmirror/__init__.py +++ b/grokmirror/__init__.py @@ -927,16 +927,23 @@ def load_config_file(cfgfile): sys.stderr.write(' Perhaps this is a grokmirror-1.x config file?\n') sys.exit(1) - toplevel = os.path.realpath(config['core'].get('toplevel')) + toplevel = os.path.realpath(os.path.expanduser(config['core'].get('toplevel'))) if not os.access(toplevel, os.W_OK): logger.critical('Toplevel %s does not exist or is not writable', toplevel) sys.exit(1) + # Just in case we did expanduser + config['core']['toplevel'] = toplevel obstdir = config['core'].get('objstore', None) if obstdir is None: obstdir = os.path.join(toplevel, 'objstore') config['core']['objstore'] = obstdir + # Handle some other defaults + manifile = config['core'].get('manifest') + if not manifile: + config['core']['manifest'] = os.path.join(toplevel, 'manifest.js.gz') + fstat = os.stat(cfgfile) # stick last config file modification date into the config object, # so we can catch config file updates @@ -1004,7 +1011,7 @@ def init_logger(subcommand, logfile, loglevel, verbose): logger.setLevel(logging.DEBUG) if logfile: - ch = logging.handlers.WatchedFileHandler(logfile) + ch = logging.handlers.WatchedFileHandler(os.path.expanduser(logfile)) formatter = logging.Formatter(subcommand + '[%(process)d] %(asctime)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) ch.setLevel(loglevel) diff --git a/contrib/pi-piper.py b/grokmirror/pi-piper.py index 59ab27f..5d5bbfb 100644..100755 --- a/contrib/pi-piper.py +++ b/grokmirror/pi-piper.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# This is a ready-made post_update_hook for mirroring public-inbox repositories. -# updated via grokmirror to arbitrary commands. +# This is a ready-made post_update_hook script for piping messages from +# mirrored public-inbox repositories to arbitrary commands (e.g. procmail). # __author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>' @@ -36,20 +36,9 @@ def git_get_new_revs(fullpath: str, pipelast: Optional[int] = None) -> list: if pipelast: rev_range = '-n %d' % pipelast else: - try: - with open(statf, 'r') as fh: - latest = fh.read().strip() - rev_range = f'{latest}..' - except FileNotFoundError: - logger.info('Initial run for %s', fullpath) - args = ['rev-list', '-n', '1', 'master'] - ecode, out, err = grokmirror.run_git_command(fullpath, args) - if ecode > 0: - raise KeyError('Could not list revs in %s' % fullpath) - # Just write latest into the tracking file and return nothing - with open(statf, 'w') as fh: - fh.write(out.strip()) - return list() + with open(statf, 'r') as fh: + latest = fh.read().strip() + rev_range = f'{latest}..' args = ['rev-list', '--pretty=oneline', '--reverse', rev_range, 'master'] ecode, out, err = grokmirror.run_git_command(fullpath, args) @@ -66,7 +55,33 @@ def git_get_new_revs(fullpath: str, pipelast: Optional[int] = None) -> list: return newrevs -def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None): +def reshallow(repo: str, commit_id: str) -> int: + with open(os.path.join(repo, 'shallow'), 'w') as fh: + fh.write(commit_id) + fh.write('\n') + logger.info(' prune: %s ', repo) + ecode, out, err = grokmirror.run_git_command(repo, ['gc', '--prune=now']) + return ecode + + +def init_piper_tracking(repo: str, shallow: bool) -> bool: + logger.info('Initial setup for %s', repo) + args = ['rev-list', '-n', '1', 'master'] + ecode, out, err = grokmirror.run_git_command(repo, args) + if ecode > 0 or not out: + logger.info('Could not list revs in %s', repo) + return False + # Just write latest into the tracking file and return + latest = out.strip() + statf = os.path.join(repo, 'pi-piper.latest') + with open(statf, 'w') as fh: + fh.write(latest) + if shallow: + reshallow(repo, latest) + + +def run_pi_repo(repo: str, pipedef: str, dryrun: bool = False, shallow: bool = False, + pipelast: Optional[int] = None) -> None: logger.info('Checking %s', repo) sp = shlex.shlex(pipedef, posix=True) sp.whitespace_split = True @@ -76,6 +91,14 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None): sys.exit(1) statf = os.path.join(repo, 'pi-piper.latest') + if not os.path.exists(statf): + if dryrun: + logger.info('Would have set up piper for %s [DRYRUN]', repo) + return + if not init_piper_tracking(repo, shallow): + logger.critical('Unable to set up piper for %s', repo) + return + try: revlist = git_get_new_revs(repo, pipelast=pipelast) except KeyError: @@ -88,8 +111,12 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None): # then going through history to find the new commit-id of that # message. Unless, of course, that's the exact message that got # deleted in the first place. :/ + # This also makes it hard with shallow repos, since we'd have + # to unshallow them first in order to find that message. logger.critical('Assuming the repository got rebased, dropping all history.') os.unlink(statf) + if not dryrun: + init_piper_tracking(repo, shallow) revlist = git_get_new_revs(repo) if not revlist: @@ -119,18 +146,20 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None): with open(statf, 'w') as fh: fh.write(latest_good) logger.info('Wrote %s', statf) + if ecode == 0 and shallow: + reshallow(repo, latest_good) sys.exit(ecode) -def main(): +def command(): import argparse from configparser import ConfigParser, ExtendedInterpolation global logger # noinspection PyTypeChecker - op = argparse.ArgumentParser(prog='pi-piper', + op = argparse.ArgumentParser(prog='grok-pi-piper', description='Pipe new messages from public-inbox repositories to arbitrary commands', formatter_class=argparse.ArgumentDefaultsHelpFormatter) op.add_argument('-v', '--verbose', action='store_true', @@ -149,11 +178,12 @@ def main(): opts = op.parse_args() - if not os.path.exists(opts.config): - sys.stderr.write('ERORR: File does not exist: %s\n' % opts.config) + cfgfile = os.path.expanduser(opts.config) + if not cfgfile: + sys.stderr.write('ERORR: File does not exist: %s\n' % cfgfile) sys.exit(1) config = ConfigParser(interpolation=ExtendedInterpolation()) - config.read(os.path.expanduser(opts.config)) + config.read(os.path.expanduser(cfgfile)) # Find out the section that we want from the config file section = 'DEFAULT' @@ -166,16 +196,18 @@ def main(): # Quick exit sys.exit(0) - logfile = os.path.expanduser(config[section].get('logfile')) + logfile = config[section].get('log') if config[section].get('loglevel') == 'debug': loglevel = logging.DEBUG else: loglevel = logging.INFO + shallow = config[section].getboolean('shallow', False) # noqa + logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose) - run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, pipelast=opts.pipelast) + run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, shallow=shallow, pipelast=opts.pipelast) if __name__ == '__main__': - main() + command() diff --git a/grokmirror/pull.py b/grokmirror/pull.py index ac7672f..1b2b7a0 100755 --- a/grokmirror/pull.py +++ b/grokmirror/pull.py @@ -511,12 +511,12 @@ def run_post_update_hook(toplevel, gitdir, hookscripts): return for hookscript in hookscripts.split('\n'): - hookscript = hookscript.strip() + hookscript = os.path.expanduser(hookscript.strip()) sp = shlex.shlex(hookscript, posix=True) sp.whitespace_split = True args = list(sp) - logger.info(' hook: %s', args[0]) + logger.info(' hook: %s', ' '.join(args)) if not os.access(args[0], os.X_OK): logger.warning('post_update_hook %s is not executable', hookscript) continue @@ -1063,7 +1063,6 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False): pull_threads = config['pull'].getint('pull_threads', 0) if pull_threads < 1: # take half of available CPUs by default - logger.info('pull_threads is not set, consider setting it') pull_threads = int(mp.cpu_count() / 2) busy = set() diff --git a/man/grok-pi-piper.1 b/man/grok-pi-piper.1 new file mode 100644 index 0000000..d1f3da6 --- /dev/null +++ b/man/grok-pi-piper.1 @@ -0,0 +1,129 @@ +.\" Man page generated from reStructuredText. +. +.TH GROK-PI-PIPER 1 "2020-10-07" "2.0.2" "" +.SH NAME +GROK-PI-PIPER \- Hook script for piping new messages from public-inbox repos +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.INDENT 0.0 +.INDENT 3.5 +grok\-pi\-piper [\-h] [\-v] [\-d] \-c CONFIG [\-l PIPELAST] [\-\-version] repo +.UNINDENT +.UNINDENT +.SH DESCRIPTION +.sp +This is a ready\-made hook script that can be called from +pull.post_update_hook when mirroring public\-inbox repositories. It will +pipe all newly received messages to arbitrary commands defined in the +config file. The simplest configuration for lore.kernel.org is: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +~/.config/pi\-piper.conf +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +[DEFAULT] +pipe = /usr/bin/procmail +shallow = yes + +~/.procmailrc +\-\-\-\-\-\-\-\-\-\-\-\-\- +DEFAULT=$HOME/Maildir/ + +~/.config/lore.conf +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +[core] +toplevel = ~/.local/share/grokmirror/lore +log = ${toplevel}/grokmirror.log + +[remote] +site = https://lore.kernel.org +manifest = https://lore.kernel.org/manifest.js.gz + +[pull] +post_update_hook = ~/.local/bin/grok\-pi\-piper \-c ~/.config/pi\-piper.conf +include = /list\-you\-want/* + /another\-list/* +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +It assumes that grokmirror was installed from pip. If you installed it +via some other means, please check the path for the grok\-pi\-piper +script. +.sp +Note, that initial clone may take a long time, even if you set +shallow=yes. +.sp +See pi\-piper.conf for other config options. +.SH OPTIONS +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B \-h\fP,\fB \-\-help +show this help message and exit +.TP +.B \-v\fP,\fB \-\-verbose +Be verbose and tell us what you are doing (default: False) +.TP +.B \-d\fP,\fB \-\-dry\-run +Do a dry\-run and just show what would be done (default: False) +.TP +.BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG +Location of the configuration file (default: None) +.TP +.BI \-l \ PIPELAST\fP,\fB \ \-\-pipe\-last \ PIPELAST +Force pipe last NN messages in the list, regardless of tracking (default: None) +.TP +.B \-\-version +show program\(aqs version number and exit +.UNINDENT +.UNINDENT +.UNINDENT +.SH SEE ALSO +.INDENT 0.0 +.IP \(bu 2 +grok\-pull(1) +.IP \(bu 2 +git(1) +.UNINDENT +.SH SUPPORT +.sp +Email \fI\%tools@linux.kernel.org\fP\&. +.SH AUTHOR +mricon@kernel.org + +License: GPLv3+ +.SH COPYRIGHT +The Linux Foundation and contributors +.\" Generated by docutils manpage writer. +. diff --git a/man/grok-pi-piper.1.rst b/man/grok-pi-piper.1.rst new file mode 100644 index 0000000..18d16bb --- /dev/null +++ b/man/grok-pi-piper.1.rst @@ -0,0 +1,79 @@ +GROK-PI-PIPER +============= +----------------------------------------------------------- +Hook script for piping new messages from public-inbox repos +----------------------------------------------------------- + +:Author: mricon@kernel.org +:Date: 2020-10-07 +:Copyright: The Linux Foundation and contributors +:License: GPLv3+ +:Version: 2.0.2 +:Manual section: 1 + +SYNOPSIS +-------- + grok-pi-piper [-h] [-v] [-d] -c CONFIG [-l PIPELAST] [--version] repo + +DESCRIPTION +----------- +This is a ready-made hook script that can be called from +pull.post_update_hook when mirroring public-inbox repositories. It will +pipe all newly received messages to arbitrary commands defined in the +config file. The simplest configuration for lore.kernel.org is:: + + ~/.config/pi-piper.conf + ----------------------- + [DEFAULT] + pipe = /usr/bin/procmail + shallow = yes + + ~/.procmailrc + ------------- + DEFAULT=$HOME/Maildir/ + + ~/.config/lore.conf + ------------------- + [core] + toplevel = ~/.local/share/grokmirror/lore + log = ${toplevel}/grokmirror.log + + [remote] + site = https://lore.kernel.org + manifest = https://lore.kernel.org/manifest.js.gz + + [pull] + post_update_hook = ~/.local/bin/grok-pi-piper -c ~/.config/pi-piper.conf + include = /list-you-want/* + /another-list/* + +It assumes that grokmirror was installed from pip. If you installed it +via some other means, please check the path for the grok-pi-piper +script. + +Note, that initial clone may take a long time, even if you set +shallow=yes. + +See pi-piper.conf for other config options. + + +OPTIONS +------- + -h, --help show this help message and exit + -v, --verbose Be verbose and tell us what you are doing (default: False) + -d, --dry-run Do a dry-run and just show what would be done (default: False) + -c CONFIG, --config CONFIG + Location of the configuration file (default: None) + -l PIPELAST, --pipe-last PIPELAST + Force pipe last NN messages in the list, regardless of tracking (default: None) + --version show program's version number and exit + + +SEE ALSO +-------- +* grok-pull(1) +* git(1) + +SUPPORT +------- +Email tools@linux.kernel.org. diff --git a/contrib/pi-piper.conf b/pi-piper.conf index 023fd1a..9d1a4d0 100644 --- a/contrib/pi-piper.conf +++ b/pi-piper.conf @@ -5,10 +5,18 @@ # DEFAULT=$HOME/Maildir/ # You can now read your mail with "mutt -f ~/Maildir/" pipe = /usr/bin/procmail -# You can use ~/ for paths in your home dir -logfile = ~/pi-piper.log +# Once you've successfully piped the messages, you generally +# don't need them any more. If you set shallow = yes, then +# the repository will be configured as "shallow" and all succesffully +# processed messages will be pruned from the repo. +# This will greatly reduce disk space usage, especially on large archives. +# You can always get any number of them back, e.g. by running: +# git fetch _grokmirror master --deepen 100 +shallow = yes +# You can use ~/ for paths in your home dir, or omit for no log +#log = ~/pi-piper.log # Can be "info" or "debug". Note, that debug will have message bodies as well. -loglevel = info +#loglevel = info # Overrides for any defaults. You may not need any if all you want is to pipe all mirrored # public-inboxes to procmail. @@ -64,6 +64,7 @@ setup( "grok-fsck=grokmirror.fsck:command", "grok-manifest=grokmirror.manifest:command", "grok-bundle=grokmirror.bundle:command", + "grok-pi-piper=grokmirror.pi-piper:command", ] } ) |