diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-04-04 12:26:12 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-04-04 12:26:12 -0400 |
commit | 48d62f445304dc536846a5a8235ca71d4db66c90 (patch) | |
tree | 16a3b07fa8829a5ebe256711fa576c925cf7576d | |
parent | 80ecf568a279c753c1adf1767c20d9fc080b68f0 (diff) | |
download | peebz-48d62f445304dc536846a5a8235ca71d4db66c90.tar.gz |
pi2bz: implement more accurate trigger for commands
We use pi_query when looking for interesting threads, but we can have
false-positives (e.g. someone talking about bugbot trigger words, not
someone actually issuing bot commands).
This implements pi_trigger_regexes so we can separate commands (on a
separate line) from false-positives.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | TODO.otl | 14 | ||||
-rw-r--r-- | default.config.toml | 4 | ||||
-rw-r--r-- | peebz/__init__.py | 16 | ||||
-rw-r--r-- | peebz/parse.py | 58 | ||||
-rw-r--r-- | peebz/pi2bz.py | 19 |
5 files changed, 87 insertions, 24 deletions
diff --git a/TODO.otl b/TODO.otl new file mode 100644 index 0000000..055f0bc --- /dev/null +++ b/TODO.otl @@ -0,0 +1,14 @@ +Bugzilla-to-PI +---------------------------------------- +[_] Newly added comments without an (In reply to) should thread the latest comment +[_] Collect all recipients for the initial bug tracking notification + + +PI-to-Bugzilla +---------------------------------------- +[X] Trigger bugbot on more precise phrases + + +Documentation +---------------------------------------- +[_] Write basic README diff --git a/default.config.toml b/default.config.toml index 993b9af..38905c4 100644 --- a/default.config.toml +++ b/default.config.toml @@ -33,7 +33,9 @@ new_bug_send_notification = true pi_query = '(nq:"bugbot on" OR nq:"bugbot assign")' pi_must_bz_groups = ['editbugs'] pi_url = 'https://lore.kernel.org/all/' -pi_assign_regex = '^bugbot assign to (\S+)' +# These are always multiline, case-insensitive +pi_trigger_regexes = ['^bugbot on\s*$', '^bugbot assign to \S+$'] +pi_assign_regexes = ['^bugbot assign to (\S+)'] bz_new_bugs_quicksearch = 'OPEN flag:bugbot+' bz_privacy_mode = true alwayscc = ['bugs@lists.linux.dev'] diff --git a/peebz/__init__.py b/peebz/__init__.py index d9dc238..eb24111 100644 --- a/peebz/__init__.py +++ b/peebz/__init__.py @@ -636,8 +636,7 @@ def msg_get_author(msg: email.message.EmailMessage) -> Tuple[str, str]: return author -def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]: - msgid = b4.LoreMessage.get_clean_msgid(msg) +def msg_get_payload(msg: email.message.EmailMessage, strip_quoted: bool = False) -> str: mp = msg.get_body(preferencelist=('plain',)) bbody = mp.get_payload(decode=True) cs = mp.get_content_charset() @@ -647,6 +646,19 @@ def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, # Strip signature if we find it chunks = cpay.rsplit('\n-- \n', maxsplit=1) cbody = chunks[0] + if not strip_quoted: + return cbody + + stripped = list() + for line in cbody.splitlines(): + if not line.startswith('> '): + stripped.append(line) + return '\n'.join(stripped) + + +def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]: + msgid = b4.LoreMessage.get_clean_msgid(msg) + cbody = msg_get_payload(msg) lsub = b4.LoreSubject(msg.get('Subject', '')) subject = lsub.subject atts = msg_get_valid_attachments(msg) diff --git a/peebz/parse.py b/peebz/parse.py index 07de9e5..f6785cc 100644 --- a/peebz/parse.py +++ b/peebz/parse.py @@ -9,7 +9,7 @@ import peebz import b4 import re -from typing import Tuple, Dict +from typing import Tuple, Dict, List import email.message import email.utils @@ -116,6 +116,31 @@ def new_comment_from_msg(bid: int, cid: int, msg: email.message.EmailMessage, dr return cid +def get_assignee(msg: email.message.EmailMessage, regexes: List[str]) -> str: + payload = peebz.msg_get_payload(msg) + fromaddr = peebz.msg_get_author(msg)[1] + assignee = None + + for regex in regexes: + matches = re.search(regex, payload, flags=re.I | re.M) + if matches: + assignee = matches.groups()[0] + if assignee == 'me': + logger.debug('me=%s', fromaddr) + assignee = fromaddr + # Does this user exist? + try: + peebz.bz_get_user(assignee) + logger.debug('found assignee=%s (matched regex: %s)', assignee, regex) + # First match wins + break + except LookupError: + logger.info('Unable to assign to %s: no such user', assignee) + assignee = None + + return assignee + + def process_rfc2822(msg: email.message.EmailMessage, product: str, component: str, dry_run: bool = False) -> None: # Ignore any messages that have an X-Bugzilla-Product header, @@ -159,28 +184,21 @@ def process_rfc2822(msg: email.message.EmailMessage, product: str, component: st new_bug_notification(bid, cid, dry_run=dry_run) # Do we have any assign triggers? - assign_re = cconf.get('pi_assign_regex') - if assign_re: - matches = re.search(assign_re, msg.as_string(), flags=re.I | re.M) - if matches: + assign_res = cconf.get('pi_assign_regexes') + if assign_res: + assignee = get_assignee(msg, assign_res) + if assignee: + # Is this person allowed to set assignees? author = peebz.msg_get_author(msg) fromaddr = author[1] if peebz.bz_check_user_allowed(fromaddr, product, component): - assign_to = matches.groups()[0] - if assign_to == 'me': - logger.debug('me=%s', fromaddr) - assign_to = fromaddr - # Does this user exist? - try: - peebz.bz_get_user(assign_to) - if not dry_run: - peebz.bz_assign_bug(bid, assign_to) - else: - logger.debug('---DRY RUN---') - logger.debug('Would have assigned bid=%s to %s', bid, assign_to) - - except LookupError: - logger.info('Unable to assign %s to %s: no such user', bid, assign_to) + if not dry_run: + peebz.bz_assign_bug(bid, assignee) + else: + logger.debug('---DRY RUN---') + logger.debug('Would have assigned bid=%s to %s', bid, assignee) + else: + logger.debug('User %s is not allowed to set assignees', fromaddr) def main(cmdargs: argparse.Namespace) -> None: diff --git a/peebz/pi2bz.py b/peebz/pi2bz.py index 36f07d3..240f64c 100644 --- a/peebz/pi2bz.py +++ b/peebz/pi2bz.py @@ -11,6 +11,7 @@ import urllib.parse import email.message import gzip import datetime +import re from typing import List, Set @@ -153,8 +154,24 @@ def update_component(product: str, component: str, dry_run: bool = False): author = peebz.msg_get_author(msg) fromaddr = author[1] if not peebz.bz_check_user_allowed(fromaddr, product, component): - logger.debug('skipping msg %s', msg.get('Subject')) + logger.debug('author=%s not allowed, skipping msg %s', fromaddr, msg.get('Subject')) continue + # Check fine trigger, if configured + trigger_res = cconf.get('pi_trigger_regexes', list()) + if trigger_res: + payload = peebz.msg_get_payload(msg) + found = False + for trigger_re in trigger_res: + matches = re.search(trigger_re, payload, flags=re.I | re.M) + if matches: + logger.debug('found trigger_regex: %s', trigger_re) + found = True + break + + if not found: + logger.debug('trigger_regexes not found, skipping msg %s', msg.get('Subject')) + continue + # Retrieve and queue up the entire thread try: tmsgs = get_sorted_thread(url, msgid) |