aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-04-01 14:42:41 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-04-01 14:42:41 -0400
commitcb55769aa192731d819b306d3d3904ba443d22dd (patch)
tree4179cf245d9df88bab1e8cc548436769d85d15c8
parent3f3a8532c0e0101c8ba7121182a756ba68713923 (diff)
downloadkorg-helpers-cb55769aa192731d819b306d3d3904ba443d22dd.tar.gz
Add sig-prover
Sig-prover is a tool I use to random-check mirrors for signs of tarball corruption (or worse). It will download random kernel/git/etc tarballs fron the frontends and verify them against the signatures, alerting when there is a verification failure. This script is not a guaranteed mechanism to detect intrusion -- an attacker can defeat it by analyzing access patterns/IPs and serving different content when it suspects that someone is running an automated signature verification check. The script can probably be improved by adding random delays between retrieving the tarball and the detached signature, setting a referrer value, etc. However, even with added measures, it will always act fairly predictably, so there will always remain a way to defeat it. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--sig-prover-keyrings/dirsigner.gpgbin0 -> 2746 bytes
-rw-r--r--sig-prover-keyrings/git.gpgbin0 -> 26401 bytes
-rw-r--r--sig-prover-keyrings/kernel.gpgbin0 -> 83426 bytes
-rw-r--r--sig-prover-keyrings/old-kernel.gpgbin0 -> 86266 bytes
-rw-r--r--sig-prover.conf88
-rwxr-xr-xsig-prover.py406
6 files changed, 494 insertions, 0 deletions
diff --git a/sig-prover-keyrings/dirsigner.gpg b/sig-prover-keyrings/dirsigner.gpg
new file mode 100644
index 0000000..f122138
--- /dev/null
+++ b/sig-prover-keyrings/dirsigner.gpg
Binary files differ
diff --git a/sig-prover-keyrings/git.gpg b/sig-prover-keyrings/git.gpg
new file mode 100644
index 0000000..d1f3d29
--- /dev/null
+++ b/sig-prover-keyrings/git.gpg
Binary files differ
diff --git a/sig-prover-keyrings/kernel.gpg b/sig-prover-keyrings/kernel.gpg
new file mode 100644
index 0000000..f949a97
--- /dev/null
+++ b/sig-prover-keyrings/kernel.gpg
Binary files differ
diff --git a/sig-prover-keyrings/old-kernel.gpg b/sig-prover-keyrings/old-kernel.gpg
new file mode 100644
index 0000000..74cde83
--- /dev/null
+++ b/sig-prover-keyrings/old-kernel.gpg
Binary files differ
diff --git a/sig-prover.conf b/sig-prover.conf
new file mode 100644
index 0000000..b94fabe
--- /dev/null
+++ b/sig-prover.conf
@@ -0,0 +1,88 @@
+[DEFAULT]
+# Each section is weighted -- the higher the number, the more likely
+# it is to be picked during each random run
+weight = 10
+# Allows us to fake a user-agent in case the remote is returning different results
+# based on the name of sig-prover. You can have multiple entries -- a random one
+# will be chosen. By default, we use User-Agent: sig-prover/{version}
+#useragent = Wget/1.21.1 (linux-gnu)
+# curl/7.54.1
+# The directory with all keyrings. To generate a keyring, run:
+# gpg --no-default-keyring --keyring=./foo.gpg --import key1.asc key2.asc ...
+keyringdir = ./sig-prover-keyrings
+# We download and uncompress each tarball, so make sure you have enough room
+# in the temporary directory location.
+#tempdir = /tmp
+# If you set notify, then an email will be sent out to this address instead of just
+# printing an error message. I suggest if you want to help us out, first few runs
+# make sure that things are properly verifying, and then set up to notify
+# admin@kernel.org on any errors (and cc yourself, if you like)
+#notify = admin@kernel.org
+#notify_cc = you@some.addr
+# Please set mailfrom if you've set "notify", as we may need to talk to you.
+#mailfrom = you@some.addr
+# you can use any modern authenticated SMTP host by setting the values below,
+# or you can just set mailhost = localhost and leave the rest commented out
+#mailhost = mail.kernel.org:587
+#mailtls = yes
+#mailuser = [some username]
+#mailpass = [some password]
+# These hosts will be concatenated with the paths defined in each section.
+# You can reduce this to just the nearest host to you.
+hosts = https://ewr.edge.kernel.org
+ https://sjc.edge.kernel.org
+ https://nrt.edge.kernel.org
+ https://ams.edge.kernel.org
+# We grab sha256sums.asc from each path defined in the sections below, which
+# is inline-signed by a special "autosigner" key. This key should NOT be added
+# to the other keyrings, as it is NOT supposed to be signing any releases
+# (just the checksums). This is VERY important.
+dirsigner_keyring = dirsigner.gpg
+# When we find an entry that ends with .tar.foo, we will look for a matching
+# unfoo entry. Check that bunzip2 is actually available on your system, as it may
+# no longer be installed by default, but old releases still need it.
+unxz = /usr/bin/unxz
+unbz2 = /usr/bin/bunzip2
+ungz = /usr/bin/gunzip
+# You can override gpg binary location if it's not /usr/bin/gpg
+#gpgbin = /usr/bin/gpg2
+# If you don't define this value, we'll exit after the first run.
+sleep = 60
+
+# You can override any of the DEFAULT values in the sections below, and anything
+# defined in DEFAULT is defined in each section.
+[latest]
+weight = 20
+# If we find a "json" entry, we'll use it instead of using hosts + paths + masks
+json = https://www.kernel.org/releases.json
+keyring = kernel.gpg
+
+[current-kernels]
+weight = 50
+# We append these to "hosts"
+# The starting and trailing slashes are important!
+paths = /pub/linux/kernel/v4.x/
+ /pub/linux/kernel/v5.x/
+# this is a regex that defines which files from sha256sums.asc we'll consider
+masks = linux-\d.*\.tar\..*
+keyring = kernel.gpg
+
+[git]
+weight = 10
+paths = /pub/software/scm/git/
+masks = git-\d.*\.tar\..*
+keyring = git.gpg
+
+# If you have bandwidth to spare, please check these, too.
+#[old-kernels]
+#weight = 5
+#paths = /pub/linux/kernel/v3.x/
+# /pub/linux/kernel/v2.6/
+# /pub/linux/kernel/v2.6/longterm/
+# /pub/linux/kernel/v2.6/longterm/v2.6.27/
+# /pub/linux/kernel/v2.6/longterm/v2.6.32/
+# /pub/linux/kernel/v2.6/longterm/v2.6.33/
+# /pub/linux/kernel/v2.6/longterm/v2.6.34/
+# /pub/linux/kernel/v2.6/longterm/v2.6.35/
+#masks = linux-\d.*\.tar\..*
+#keyring = old-kernel.gpg
diff --git a/sig-prover.py b/sig-prover.py
new file mode 100755
index 0000000..6d2d79b
--- /dev/null
+++ b/sig-prover.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python3
+# This script will check random content published on www.kernel.org/pub against
+# authorized signatures to identify when corruption or substitution happens. The name
+# comes from the Russian word /proveryat/, meaning "to verify".
+#
+# The script it supposed to be fire-and-forget, running in a screen session or as a
+# systemd service, with reports sent to admin@kernel.org.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# -*- coding: utf-8 -*-
+#
+__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
+
+import sys
+import os
+import logging
+import argparse
+import requests
+import random
+import subprocess
+import tempfile
+import re
+import time
+import json
+
+import email
+import email.message
+import email.utils
+import smtplib
+
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+
+logger = logging.getLogger(__name__)
+REQSESSION = None
+GPGBIN = '/usr/bin/gpg'
+SEEN = dict()
+
+__VERSION__ = '0.1'
+
+
+def get_requests_session(useragent=None):
+ global REQSESSION
+ if REQSESSION is None:
+ REQSESSION = requests.session()
+ retry = Retry(connect=3, backoff_factor=1)
+ adapter = HTTPAdapter(max_retries=retry)
+ REQSESSION.mount('http://', adapter)
+ REQSESSION.mount('https://', adapter)
+ if useragent is None:
+ useragent = f'Sig-Prover/{__VERSION__}'
+
+ headers = {
+ 'User-Agent': useragent,
+ }
+ REQSESSION.headers.update(headers)
+
+ return REQSESSION
+
+
+def get_random_target(config, rsect):
+ global SEEN
+ if rsect not in SEEN:
+ SEEN[rsect] = set()
+
+ ua = config[rsect].get('useragent')
+ if ua:
+ ua = random.choice(ua.split('\n'))
+ rses = get_requests_session(useragent=ua)
+ candidates = list()
+
+ # Is it a releases.json, or a collection of hosts and paths?
+ jurl = config[rsect].get('json')
+ if jurl:
+ logger.info(' retrieving %s', jurl)
+ resp = rses.get(jurl)
+ resp.raise_for_status()
+ rels = json.loads(resp.content)
+ for release in rels['releases']:
+ if not release['pgp']:
+ continue
+ candidate = release['source']
+ # Do we define hosts?
+ hosts = config[rsect].get('hosts')
+ if hosts and candidate.find('https://cdn') == 0:
+ # Swap in the CDN URL with an actual host URL, as it doesn't
+ # really make sense to check things over cdn cache which we don't
+ # control and can't do anything about.
+ for rhost in config[rsect].get('hosts').split('\n'):
+ hostcand = candidate.replace('https://cdn.kernel.org', rhost)
+ if hostcand not in SEEN[rsect]:
+ candidate = hostcand
+ break
+
+ if candidate in SEEN[rsect]:
+ logger.debug('Already checked %s in this session', candidate)
+ continue
+ candidates.append(candidate)
+
+ else:
+ # Grab a random host
+ rhost = random.choice(config[rsect].get('hosts').split('\n'))
+ # Grab a random path
+ rpath = random.choice(config[rsect].get('paths').split('\n'))
+ rurl = rhost + rpath
+ # Now we grab the sha256sums.txt file from there
+ shapath = rurl + 'sha256sums.asc'
+ logger.info(' retrieving %s', shapath)
+ resp = rses.get(shapath)
+ resp.raise_for_status()
+
+ keyring = os.path.join(config[rsect].get('keyringdir'), config[rsect].get('dirsigner_keyring'))
+ logger.info(' verifying')
+ gpgargs = ['--verify', '--status-fd=2', '-']
+ ecode, out, err = gpg_run_command(gpgargs, keyring, stdin=resp.content)
+ if ecode == 0:
+ good, valid, created, errors = validate_gpg_signature(err.decode())
+ if good and valid:
+ logger.info(' checksums signature is good and valid (created: %s)', created)
+ else:
+ errors = err.decode().split('\n')
+
+ if errors:
+ report_badness(config[rsect], shapath, errors)
+
+ rmask = random.choice(config[rsect].get('masks').split('\n'))
+ for line in resp.content.split(b'\n'):
+ if re.search(rmask.encode(), line):
+ filen = line.split()[1].decode()
+ candidate = rurl + filen
+ if candidate in SEEN[rsect]:
+ logger.debug('Already checked %s in this session', candidate)
+ continue
+ candidates.append(rurl + filen)
+
+ if not candidates:
+ logger.debug('Already tried all possible choices for %s', rsect)
+ candidates = list(SEEN[rsect])
+ SEEN[rsect] = set()
+
+ if not candidates:
+ logger.info('No suitable candidates found for %s', rsect)
+ return None
+
+ candidate = random.choice(candidates)
+ SEEN[rsect].add(candidate)
+ return candidate
+
+
+def _run_command(cmdargs, stdin=None):
+ logger.debug('Running %s' % ' '.join(cmdargs))
+
+ sp = subprocess.Popen(cmdargs,
+ stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ (output, error) = sp.communicate(input=stdin)
+
+ return sp.returncode, output, error
+
+
+def gpg_run_command(args, keyring, stdin=None):
+ cmdargs = [GPGBIN, '--batch', '--no-auto-key-retrieve', '--no-auto-check-trustdb', '--no-default-keyring',
+ '--keyring', keyring]
+ cmdargs += args
+
+ return _run_command(cmdargs, stdin=stdin)
+
+
+def validate_gpg_signature(output):
+ good = False
+ valid = False
+ created = None
+ errors = set()
+ gs_matches = re.search(r'^\[GNUPG:] GOODSIG ([0-9A-F]+)\s+.*$', output, re.M)
+ if gs_matches:
+ logger.debug(' GOODSIG')
+ good = True
+ keyid = gs_matches.groups()[0]
+ vs_matches = re.search(r'^\[GNUPG:] VALIDSIG ([0-9A-F]+) (\d{4}-\d{2}-\d{2}) (\d+)', output, re.M)
+ if vs_matches:
+ logger.debug(' VALIDSIG')
+ valid = True
+ created = vs_matches.groups()[1]
+ else:
+ errors.add('Signature not valid from key: %s' % keyid)
+ else:
+ # Are we missing a key?
+ matches = re.search(r'^\[GNUPG:] NO_PUBKEY ([0-9A-F]+)$', output, re.M)
+ if matches:
+ errors.add('Missing public key: %s' % matches.groups()[0])
+ # Is the key expired?
+ matches = re.search(r'^\[GNUPG:] EXPKEYSIG (.*)$', output, re.M)
+ if matches:
+ errors.add('Expired key: %s' % matches.groups()[0])
+
+ return good, valid, created, errors
+
+
+def report_badness(config, furl, errors):
+ if not config.get('notify'):
+ logger.critical('ERROR: failed verifying: %s', furl)
+ for entry in errors:
+ logger.critical(' %s', entry)
+ logger.debug('WARNING: notify not set, not sending a mail report')
+ sys.exit(1)
+
+ logger.info('ERROR: failed verifying: %s', furl)
+ msg = email.message.Message()
+
+ # Set to and cc
+ msg['To'] = config.get('notify')
+ targets = [msg['To']]
+
+ ccs = config.get('notify_cc', '')
+ if ccs:
+ msg['Cc'] = ccs
+ targets += [x.strip() for x in ccs.split(',')]
+
+ msg['Subject'] = f'SIGFAIL: {furl}'
+ msg['From'] = config.get('mailfrom', 'devnull@kernel.org')
+
+ msg['Message-Id'] = email.utils.make_msgid('sig-prover')
+ msg['Date'] = email.utils.formatdate(localtime=True)
+
+ body = list()
+ body.append('Hello:')
+ body.append('')
+ body.append('The following URL failed signature verification:')
+ body.append(f' {furl}')
+ body.append('')
+ body.append('Errors:')
+ for error in errors:
+ body.append(f' {error}')
+
+ msg.set_payload('\r\n'.join(body))
+
+ logger.debug('Message follows')
+ logger.debug(msg.as_string())
+
+ mailhost = config.get('mailhost', 'localhost')
+
+ try:
+ server = smtplib.SMTP(config['mailhost'])
+ if config.getboolean('mailtls'):
+ server.starttls()
+
+ muser = config.get('mailuser')
+ mpass = config.get('mailpass')
+ if muser and mpass:
+ server.login(muser, mpass)
+
+ logger.info('Sending mail to %s', ', '.join(targets))
+ server.sendmail(msg['From'], targets, msg.as_string())
+ server.close()
+ except Exception as ex: # noqa
+ logger.critical('Unable to send mail to %s', ', '.join(targets))
+ logger.critical('Attempting to use %s returned:', mailhost)
+ logger.critical(ex)
+
+
+def verify_tarball(config, turl):
+ # Try the exact filename + .sign first
+ signurl = turl + '.sign'
+ rses = get_requests_session()
+ resp = rses.get(signurl)
+ zext = None
+ zbin = None
+ if resp.status_code > 200:
+ # Try dropping the last .foo and trying again
+ parts = turl.rsplit('.', 1)
+ signurl = parts[0] + '.sign'
+ zext = parts[1]
+ # Are we capable of dealing with zext?
+ zbin = config.get(f'un{zext}')
+ if not zbin:
+ logger.critical('Not aware of how to deal with %s compression', zext)
+ sys.exit(1)
+ logger.debug('Will use %s for uncompression', zbin)
+ resp = rses.get(signurl)
+ resp.raise_for_status()
+ logger.info(' retrieving %s', signurl)
+ with tempfile.TemporaryDirectory(suffix='.sig-prover', dir=config.get('tempdir', '/tmp')) as td:
+ signfile = os.path.join(td, 'content.sig')
+ with open(signfile, 'wb') as sfh:
+ sfh.write(resp.content)
+ resp.close()
+ logger.info(' retrieving %s', turl)
+ resp = rses.get(turl, stream=True)
+ resp.raise_for_status()
+ contentfile = os.path.join(td, 'content')
+ if zext:
+ contentfile = f'{contentfile}.{zext}'
+ with open(contentfile, 'wb') as cfh:
+ for chunk in resp.iter_content(chunk_size=8192):
+ cfh.write(chunk)
+ resp.close()
+ if zext:
+ logger.info(' uncompressing')
+ cmdargs = [zbin, contentfile]
+ ecode, out, err = _run_command(cmdargs)
+ if ecode > 0:
+ logger.critical('Error uncompressing %s', turl)
+ sys.exit(1)
+ contentfile = os.path.join(td, 'content')
+ logger.info(' verifying')
+ gpgargs = ['--verify', '--status-fd=2', signfile, contentfile]
+ keyring = os.path.join(config.get('keyringdir'), config.get('keyring'))
+ ecode, out, err = gpg_run_command(gpgargs, keyring=keyring)
+ if ecode == 0:
+ good, valid, created, errors = validate_gpg_signature(err.decode())
+ if good and valid:
+ logger.info(' signature is good and valid (created: %s)', created)
+ return
+ else:
+ errors = err.decode().split('\n')
+
+ report_badness(config, turl, errors)
+
+
+def get_random_sect(config):
+ global GPGBIN
+ sects = list(config.sections())
+ weights = list()
+ for sect in sects:
+ weights.append(config[sect].getint('weight', 10))
+
+ rsect = random.choices(sects, weights=weights, k=1)[0]
+ if config[rsect].get('gpgbin'):
+ GPGBIN = config[rsect].get('gpgbin')
+
+ return rsect
+
+
+def sig_verify(config):
+ rsect = get_random_sect(config)
+ logger.info('[%s]', rsect)
+ try:
+ target = get_random_target(config, rsect)
+ if target:
+ verify_tarball(config[rsect], target)
+ except requests.exceptions.RequestException as ex:
+ # Treat failures as non-critical, because hosts can be intermittently
+ # unreachable for various reasons.
+ logger.info('Failed getting remote content:')
+ logger.info(ex)
+
+ return config[rsect].getint('sleep', 0)
+
+
+def read_config(cfgfile):
+ from configparser import ConfigParser, ExtendedInterpolation
+ if not os.path.exists(cfgfile):
+ sys.stderr.write('ERROR: config file %s does not exist' % cfgfile)
+ sys.exit(1)
+ fconfig = ConfigParser(interpolation=ExtendedInterpolation())
+ fconfig.read(cfgfile)
+
+ return fconfig
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-c', '--config-file', dest='cfgfile', required=True,
+ help='Config file to use')
+ parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', default=False,
+ help='Quiet operation (cron mode)')
+ parser.add_argument('-d', '--debug', dest='debug', action='store_true', default=False,
+ help='Output debug information')
+ parser.add_argument('-l', '--logfile', dest='logfile',
+ help='Record activity in this log file')
+
+ _cmdargs = parser.parse_args()
+ _config = read_config(_cmdargs.cfgfile)
+ logger.setLevel(logging.DEBUG)
+
+ if _cmdargs.logfile:
+ ch = logging.FileHandler(_cmdargs.logfile)
+ formatter = logging.Formatter(f'[%(asctime)s] %(message)s')
+ ch.setFormatter(formatter)
+ ch.setLevel(logging.INFO)
+ logger.addHandler(ch)
+
+ ch = logging.StreamHandler()
+ formatter = logging.Formatter('%(message)s')
+ ch.setFormatter(formatter)
+ if _cmdargs.quiet:
+ ch.setLevel(logging.CRITICAL)
+ elif _cmdargs.debug:
+ ch.setLevel(logging.DEBUG)
+ else:
+ ch.setLevel(logging.INFO)
+ logger.addHandler(ch)
+
+ while True:
+ sleep = sig_verify(_config)
+ if not sleep:
+ break
+ logger.info('--- sleeping %s seconds ---', sleep)
+ try:
+ time.sleep(sleep)
+ except KeyboardInterrupt:
+ logger.info('Bye')
+ sys.exit(0)