aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-10-07 12:18:00 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-10-07 12:18:00 -0400
commit1f1a8585a53d28983e170d86de4abf1670f0d7dc (patch)
tree6398d6a5d167f58b66061a91623377dff82d129b
parent0d3e02a03bcad209abc97f038b5e61a0fc2b3fde (diff)
downloadgrokmirror-1f1a8585a53d28983e170d86de4abf1670f0d7dc.tar.gz
Add some more features to piper
- Make it a fully supported command called grok-pi-piper - Add support for reshallowing repos after each piper run, saving tons of space - Sprinkle expanduser in a few places to better support user-initiated operations Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--CHANGELOG.rst2
-rw-r--r--contrib/python-grokmirror.spec3
-rw-r--r--grokmirror/__init__.py11
-rwxr-xr-x[-rw-r--r--]grokmirror/pi-piper.py (renamed from contrib/pi-piper.py)82
-rwxr-xr-xgrokmirror/pull.py5
-rw-r--r--man/grok-pi-piper.1129
-rw-r--r--man/grok-pi-piper.1.rst79
-rw-r--r--pi-piper.conf (renamed from contrib/pi-piper.conf)14
-rw-r--r--setup.py1
9 files changed, 290 insertions, 36 deletions
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5b9c69f..c5e1eee 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,7 +1,7 @@
v2.0.2 (2020-10-06)
- Provide pi-piper utility for piping new messages from public-inbox
repositories. It can be specified as post_update_hook:
- post_update_hook = /usr/bin/pi-piper -c ~/pi-piper.conf
+ post_update_hook = /usr/bin/grok-pi-piper -c ~/.config/pi-piper.conf
- Add -r option to grok-manifest to ignore specific refs when calculating
repository fingerprint. This is mostly useful for mirroring from gerrit.
diff --git a/contrib/python-grokmirror.spec b/contrib/python-grokmirror.spec
index 3c96836..9d250bb 100644
--- a/contrib/python-grokmirror.spec
+++ b/contrib/python-grokmirror.spec
@@ -59,7 +59,6 @@ Obsoletes: python-%{srcname} < 2, python2-%{srcname} < 2
%{__install} -m 0644 contrib/*.timer %{buildroot}/%{_unitdir}/
%{__install} -m 0644 contrib/logrotate %{buildroot}/%{_sysconfdir}/logrotate.d/grokmirror
%{__install} -m 0644 grokmirror.conf %{buildroot}/%{_sysconfdir}/%{srcname}/grokmirror.conf.example
-%{__install} -m 0755 contrib/pi-piper.py %{buildroot}/%{_bindir}/pi-piper
echo "d /run/%{srcname} 0755 %{username} %{groupname}" > %{buildroot}/%{_tmpfilesdir}/%{srcname}.conf
@@ -72,7 +71,7 @@ exit 0
%files -n python3-%{srcname}
%license LICENSE.txt
-%doc README.rst grokmirror.conf contrib/pi-piper.conf
+%doc README.rst grokmirror.conf pi-piper.conf
%dir %attr(0750, %{username}, %{groupname}) %{userhome}
%dir %attr(0755, %{username}, %{groupname}) %{_localstatedir}/log/%{srcname}/
%dir %attr(0755, %{username}, %{groupname}) /run/%{srcname}/
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index f888593..b37408f 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -927,16 +927,23 @@ def load_config_file(cfgfile):
sys.stderr.write(' Perhaps this is a grokmirror-1.x config file?\n')
sys.exit(1)
- toplevel = os.path.realpath(config['core'].get('toplevel'))
+ toplevel = os.path.realpath(os.path.expanduser(config['core'].get('toplevel')))
if not os.access(toplevel, os.W_OK):
logger.critical('Toplevel %s does not exist or is not writable', toplevel)
sys.exit(1)
+ # Just in case we did expanduser
+ config['core']['toplevel'] = toplevel
obstdir = config['core'].get('objstore', None)
if obstdir is None:
obstdir = os.path.join(toplevel, 'objstore')
config['core']['objstore'] = obstdir
+ # Handle some other defaults
+ manifile = config['core'].get('manifest')
+ if not manifile:
+ config['core']['manifest'] = os.path.join(toplevel, 'manifest.js.gz')
+
fstat = os.stat(cfgfile)
# stick last config file modification date into the config object,
# so we can catch config file updates
@@ -1004,7 +1011,7 @@ def init_logger(subcommand, logfile, loglevel, verbose):
logger.setLevel(logging.DEBUG)
if logfile:
- ch = logging.handlers.WatchedFileHandler(logfile)
+ ch = logging.handlers.WatchedFileHandler(os.path.expanduser(logfile))
formatter = logging.Formatter(subcommand + '[%(process)d] %(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
ch.setLevel(loglevel)
diff --git a/contrib/pi-piper.py b/grokmirror/pi-piper.py
index 59ab27f..5d5bbfb 100644..100755
--- a/contrib/pi-piper.py
+++ b/grokmirror/pi-piper.py
@@ -1,8 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
-# This is a ready-made post_update_hook for mirroring public-inbox repositories.
-# updated via grokmirror to arbitrary commands.
+# This is a ready-made post_update_hook script for piping messages from
+# mirrored public-inbox repositories to arbitrary commands (e.g. procmail).
#
__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
@@ -36,20 +36,9 @@ def git_get_new_revs(fullpath: str, pipelast: Optional[int] = None) -> list:
if pipelast:
rev_range = '-n %d' % pipelast
else:
- try:
- with open(statf, 'r') as fh:
- latest = fh.read().strip()
- rev_range = f'{latest}..'
- except FileNotFoundError:
- logger.info('Initial run for %s', fullpath)
- args = ['rev-list', '-n', '1', 'master']
- ecode, out, err = grokmirror.run_git_command(fullpath, args)
- if ecode > 0:
- raise KeyError('Could not list revs in %s' % fullpath)
- # Just write latest into the tracking file and return nothing
- with open(statf, 'w') as fh:
- fh.write(out.strip())
- return list()
+ with open(statf, 'r') as fh:
+ latest = fh.read().strip()
+ rev_range = f'{latest}..'
args = ['rev-list', '--pretty=oneline', '--reverse', rev_range, 'master']
ecode, out, err = grokmirror.run_git_command(fullpath, args)
@@ -66,7 +55,33 @@ def git_get_new_revs(fullpath: str, pipelast: Optional[int] = None) -> list:
return newrevs
-def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None):
+def reshallow(repo: str, commit_id: str) -> int:
+ with open(os.path.join(repo, 'shallow'), 'w') as fh:
+ fh.write(commit_id)
+ fh.write('\n')
+ logger.info(' prune: %s ', repo)
+ ecode, out, err = grokmirror.run_git_command(repo, ['gc', '--prune=now'])
+ return ecode
+
+
+def init_piper_tracking(repo: str, shallow: bool) -> bool:
+ logger.info('Initial setup for %s', repo)
+ args = ['rev-list', '-n', '1', 'master']
+ ecode, out, err = grokmirror.run_git_command(repo, args)
+ if ecode > 0 or not out:
+ logger.info('Could not list revs in %s', repo)
+ return False
+ # Just write latest into the tracking file and return
+ latest = out.strip()
+ statf = os.path.join(repo, 'pi-piper.latest')
+ with open(statf, 'w') as fh:
+ fh.write(latest)
+ if shallow:
+ reshallow(repo, latest)
+
+
+def run_pi_repo(repo: str, pipedef: str, dryrun: bool = False, shallow: bool = False,
+ pipelast: Optional[int] = None) -> None:
logger.info('Checking %s', repo)
sp = shlex.shlex(pipedef, posix=True)
sp.whitespace_split = True
@@ -76,6 +91,14 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None):
sys.exit(1)
statf = os.path.join(repo, 'pi-piper.latest')
+ if not os.path.exists(statf):
+ if dryrun:
+ logger.info('Would have set up piper for %s [DRYRUN]', repo)
+ return
+ if not init_piper_tracking(repo, shallow):
+ logger.critical('Unable to set up piper for %s', repo)
+ return
+
try:
revlist = git_get_new_revs(repo, pipelast=pipelast)
except KeyError:
@@ -88,8 +111,12 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None):
# then going through history to find the new commit-id of that
# message. Unless, of course, that's the exact message that got
# deleted in the first place. :/
+ # This also makes it hard with shallow repos, since we'd have
+ # to unshallow them first in order to find that message.
logger.critical('Assuming the repository got rebased, dropping all history.')
os.unlink(statf)
+ if not dryrun:
+ init_piper_tracking(repo, shallow)
revlist = git_get_new_revs(repo)
if not revlist:
@@ -119,18 +146,20 @@ def run_pi_repo(repo, pipedef, dryrun=False, pipelast=None):
with open(statf, 'w') as fh:
fh.write(latest_good)
logger.info('Wrote %s', statf)
+ if ecode == 0 and shallow:
+ reshallow(repo, latest_good)
sys.exit(ecode)
-def main():
+def command():
import argparse
from configparser import ConfigParser, ExtendedInterpolation
global logger
# noinspection PyTypeChecker
- op = argparse.ArgumentParser(prog='pi-piper',
+ op = argparse.ArgumentParser(prog='grok-pi-piper',
description='Pipe new messages from public-inbox repositories to arbitrary commands',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
op.add_argument('-v', '--verbose', action='store_true',
@@ -149,11 +178,12 @@ def main():
opts = op.parse_args()
- if not os.path.exists(opts.config):
- sys.stderr.write('ERORR: File does not exist: %s\n' % opts.config)
+ cfgfile = os.path.expanduser(opts.config)
+ if not cfgfile:
+ sys.stderr.write('ERORR: File does not exist: %s\n' % cfgfile)
sys.exit(1)
config = ConfigParser(interpolation=ExtendedInterpolation())
- config.read(os.path.expanduser(opts.config))
+ config.read(os.path.expanduser(cfgfile))
# Find out the section that we want from the config file
section = 'DEFAULT'
@@ -166,16 +196,18 @@ def main():
# Quick exit
sys.exit(0)
- logfile = os.path.expanduser(config[section].get('logfile'))
+ logfile = config[section].get('log')
if config[section].get('loglevel') == 'debug':
loglevel = logging.DEBUG
else:
loglevel = logging.INFO
+ shallow = config[section].getboolean('shallow', False) # noqa
+
logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose)
- run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, pipelast=opts.pipelast)
+ run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, shallow=shallow, pipelast=opts.pipelast)
if __name__ == '__main__':
- main()
+ command()
diff --git a/grokmirror/pull.py b/grokmirror/pull.py
index ac7672f..1b2b7a0 100755
--- a/grokmirror/pull.py
+++ b/grokmirror/pull.py
@@ -511,12 +511,12 @@ def run_post_update_hook(toplevel, gitdir, hookscripts):
return
for hookscript in hookscripts.split('\n'):
- hookscript = hookscript.strip()
+ hookscript = os.path.expanduser(hookscript.strip())
sp = shlex.shlex(hookscript, posix=True)
sp.whitespace_split = True
args = list(sp)
- logger.info(' hook: %s', args[0])
+ logger.info(' hook: %s', ' '.join(args))
if not os.access(args[0], os.X_OK):
logger.warning('post_update_hook %s is not executable', hookscript)
continue
@@ -1063,7 +1063,6 @@ def pull_mirror(config, nomtime=False, forcepurge=False, runonce=False):
pull_threads = config['pull'].getint('pull_threads', 0)
if pull_threads < 1:
# take half of available CPUs by default
- logger.info('pull_threads is not set, consider setting it')
pull_threads = int(mp.cpu_count() / 2)
busy = set()
diff --git a/man/grok-pi-piper.1 b/man/grok-pi-piper.1
new file mode 100644
index 0000000..d1f3da6
--- /dev/null
+++ b/man/grok-pi-piper.1
@@ -0,0 +1,129 @@
+.\" Man page generated from reStructuredText.
+.
+.TH GROK-PI-PIPER 1 "2020-10-07" "2.0.2" ""
+.SH NAME
+GROK-PI-PIPER \- Hook script for piping new messages from public-inbox repos
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.SH SYNOPSIS
+.INDENT 0.0
+.INDENT 3.5
+grok\-pi\-piper [\-h] [\-v] [\-d] \-c CONFIG [\-l PIPELAST] [\-\-version] repo
+.UNINDENT
+.UNINDENT
+.SH DESCRIPTION
+.sp
+This is a ready\-made hook script that can be called from
+pull.post_update_hook when mirroring public\-inbox repositories. It will
+pipe all newly received messages to arbitrary commands defined in the
+config file. The simplest configuration for lore.kernel.org is:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+~/.config/pi\-piper.conf
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+[DEFAULT]
+pipe = /usr/bin/procmail
+shallow = yes
+
+~/.procmailrc
+\-\-\-\-\-\-\-\-\-\-\-\-\-
+DEFAULT=$HOME/Maildir/
+
+~/.config/lore.conf
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+[core]
+toplevel = ~/.local/share/grokmirror/lore
+log = ${toplevel}/grokmirror.log
+
+[remote]
+site = https://lore.kernel.org
+manifest = https://lore.kernel.org/manifest.js.gz
+
+[pull]
+post_update_hook = ~/.local/bin/grok\-pi\-piper \-c ~/.config/pi\-piper.conf
+include = /list\-you\-want/*
+ /another\-list/*
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+It assumes that grokmirror was installed from pip. If you installed it
+via some other means, please check the path for the grok\-pi\-piper
+script.
+.sp
+Note, that initial clone may take a long time, even if you set
+shallow=yes.
+.sp
+See pi\-piper.conf for other config options.
+.SH OPTIONS
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \-h\fP,\fB \-\-help
+show this help message and exit
+.TP
+.B \-v\fP,\fB \-\-verbose
+Be verbose and tell us what you are doing (default: False)
+.TP
+.B \-d\fP,\fB \-\-dry\-run
+Do a dry\-run and just show what would be done (default: False)
+.TP
+.BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG
+Location of the configuration file (default: None)
+.TP
+.BI \-l \ PIPELAST\fP,\fB \ \-\-pipe\-last \ PIPELAST
+Force pipe last NN messages in the list, regardless of tracking (default: None)
+.TP
+.B \-\-version
+show program\(aqs version number and exit
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.SH SEE ALSO
+.INDENT 0.0
+.IP \(bu 2
+grok\-pull(1)
+.IP \(bu 2
+git(1)
+.UNINDENT
+.SH SUPPORT
+.sp
+Email \fI\%tools@linux.kernel.org\fP\&.
+.SH AUTHOR
+mricon@kernel.org
+
+License: GPLv3+
+.SH COPYRIGHT
+The Linux Foundation and contributors
+.\" Generated by docutils manpage writer.
+.
diff --git a/man/grok-pi-piper.1.rst b/man/grok-pi-piper.1.rst
new file mode 100644
index 0000000..18d16bb
--- /dev/null
+++ b/man/grok-pi-piper.1.rst
@@ -0,0 +1,79 @@
+GROK-PI-PIPER
+=============
+-----------------------------------------------------------
+Hook script for piping new messages from public-inbox repos
+-----------------------------------------------------------
+
+:Author: mricon@kernel.org
+:Date: 2020-10-07
+:Copyright: The Linux Foundation and contributors
+:License: GPLv3+
+:Version: 2.0.2
+:Manual section: 1
+
+SYNOPSIS
+--------
+ grok-pi-piper [-h] [-v] [-d] -c CONFIG [-l PIPELAST] [--version] repo
+
+DESCRIPTION
+-----------
+This is a ready-made hook script that can be called from
+pull.post_update_hook when mirroring public-inbox repositories. It will
+pipe all newly received messages to arbitrary commands defined in the
+config file. The simplest configuration for lore.kernel.org is::
+
+ ~/.config/pi-piper.conf
+ -----------------------
+ [DEFAULT]
+ pipe = /usr/bin/procmail
+ shallow = yes
+
+ ~/.procmailrc
+ -------------
+ DEFAULT=$HOME/Maildir/
+
+ ~/.config/lore.conf
+ -------------------
+ [core]
+ toplevel = ~/.local/share/grokmirror/lore
+ log = ${toplevel}/grokmirror.log
+
+ [remote]
+ site = https://lore.kernel.org
+ manifest = https://lore.kernel.org/manifest.js.gz
+
+ [pull]
+ post_update_hook = ~/.local/bin/grok-pi-piper -c ~/.config/pi-piper.conf
+ include = /list-you-want/*
+ /another-list/*
+
+It assumes that grokmirror was installed from pip. If you installed it
+via some other means, please check the path for the grok-pi-piper
+script.
+
+Note, that initial clone may take a long time, even if you set
+shallow=yes.
+
+See pi-piper.conf for other config options.
+
+
+OPTIONS
+-------
+ -h, --help show this help message and exit
+ -v, --verbose Be verbose and tell us what you are doing (default: False)
+ -d, --dry-run Do a dry-run and just show what would be done (default: False)
+ -c CONFIG, --config CONFIG
+ Location of the configuration file (default: None)
+ -l PIPELAST, --pipe-last PIPELAST
+ Force pipe last NN messages in the list, regardless of tracking (default: None)
+ --version show program's version number and exit
+
+
+SEE ALSO
+--------
+* grok-pull(1)
+* git(1)
+
+SUPPORT
+-------
+Email tools@linux.kernel.org.
diff --git a/contrib/pi-piper.conf b/pi-piper.conf
index 023fd1a..9d1a4d0 100644
--- a/contrib/pi-piper.conf
+++ b/pi-piper.conf
@@ -5,10 +5,18 @@
# DEFAULT=$HOME/Maildir/
# You can now read your mail with "mutt -f ~/Maildir/"
pipe = /usr/bin/procmail
-# You can use ~/ for paths in your home dir
-logfile = ~/pi-piper.log
+# Once you've successfully piped the messages, you generally
+# don't need them any more. If you set shallow = yes, then
+# the repository will be configured as "shallow" and all succesffully
+# processed messages will be pruned from the repo.
+# This will greatly reduce disk space usage, especially on large archives.
+# You can always get any number of them back, e.g. by running:
+# git fetch _grokmirror master --deepen 100
+shallow = yes
+# You can use ~/ for paths in your home dir, or omit for no log
+#log = ~/pi-piper.log
# Can be "info" or "debug". Note, that debug will have message bodies as well.
-loglevel = info
+#loglevel = info
# Overrides for any defaults. You may not need any if all you want is to pipe all mirrored
# public-inboxes to procmail.
diff --git a/setup.py b/setup.py
index f72154c..1aabf48 100644
--- a/setup.py
+++ b/setup.py
@@ -64,6 +64,7 @@ setup(
"grok-fsck=grokmirror.fsck:command",
"grok-manifest=grokmirror.manifest:command",
"grok-bundle=grokmirror.bundle:command",
+ "grok-pi-piper=grokmirror.pi-piper:command",
]
}
)