aboutsummaryrefslogtreecommitdiffstats
path: root/main.py
diff options
context:
space:
mode:
authorDaniel Smith <daniel.smith@qt.io>2024-07-26 10:48:16 +0200
committerDaniel Smith <daniel.smith@qt.io>2024-07-26 10:48:16 +0200
commitf7c6fc29ca6484712086fecb3dc54dddea86f017 (patch)
tree6ef558cc06ea24518746fd351a949e2f3839b873 /main.py
Initial commit for QtFlake8Bot\n\nTask-number: QTQAINFRA-6468
Diffstat (limited to 'main.py')
-rw-r--r--main.py344
1 files changed, 344 insertions, 0 deletions
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..0f71243
--- /dev/null
+++ b/main.py
@@ -0,0 +1,344 @@
+# Copyright (C) 2023 The Qt Company Ltd.
+# Contact: https://www.qt.io/licensing/
+#
+# You may use this file under the terms of the 3-clause BSD license.
+# See the file LICENSE in qt/qtrepotools for details.
+#
+
+""" This script listens for incoming webhook requests of patchset-created type
+ from Gerrit, checks out the patch locally, and runs flakes8 on it.
+ It then posts a comment for each issue identified to Gerrit with the results.
+"""
+
+import json
+import os
+import sys
+import asyncio
+import base64
+import fnmatch
+import atexit
+from urllib.parse import urlparse
+from urllib.request import urlopen, Request
+from urllib.error import HTTPError, URLError
+import logging
+from systemd.journal import JournalHandler
+import tempfile
+
+from aiohttp import web
+
+log = logging.getLogger('flake8_bot')
+log.addHandler(JournalHandler())
+log.setLevel(logging.INFO)
+
+
+GERRIT_USERNAME = os.environ.get('GERRIT_USERNAME')
+GERRIT_PASSWORD = os.environ.get('GERRIT_PASSWORD')
+
+if not GERRIT_USERNAME or not GERRIT_PASSWORD:
+ log.info('Please set the GERRIT_USERNAME and GERRIT_PASSWORD environment variables.')
+ sys.exit(1)
+
+# Base64 encode the username and password
+GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD
+GERRIT_AUTH = GERRIT_AUTH.encode('utf-8')
+GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8')
+
+class Lock:
+ """ Set up a semaphore-like lock to prevent acting on the git repo while it is in use.
+ This is necessary because the Gerrit webhook is asynchronous, so multiple
+ requests may come in at once.
+ """
+ def __init__(self):
+ self.locked = False
+
+ async def acquire(self):
+ """ Acquire the lock. """
+ while True:
+ if self.locked:
+ await asyncio.sleep(1)
+ continue
+ self.locked = True
+ break # If the lock is acquired successfully, break the loop
+
+ def release(self):
+ """ Release the lock. """
+ self.locked = False
+
+semaphore = Lock()
+
+async def clone_repo(data):
+ """ Clone the target repo and check out the branch. """
+ log.info("Cloning repo %s", data['change']['project'])
+ if os.path.exists(data['repo_name']):
+ # return if the repo already exists
+ return
+ repo_url = "https://codereview.qt-project.org/" + data['change']['project'] + ".git"
+ # Clone the repo
+ p = await asyncio.create_subprocess_exec('git', 'clone', repo_url, data['repo_name'])
+ await p.communicate()
+ os.chdir(data['repo_name'])
+ # Check out the branch
+ p = await asyncio.create_subprocess_exec('git', 'checkout', data['change']['branch'])
+ await p.communicate()
+ os.chdir('..')
+
+
+async def checkout_patch(data):
+ """ Check out the patch. """
+
+ log.info("%s: Checking out patch", data['change']['number'])
+ # Check out the patch
+ os.chdir(data['repo_name'])
+ # git clean -fdx first to remove any untracked files
+ p = await asyncio.create_subprocess_exec('git', 'clean', '-fdx')
+ await p.communicate()
+ # git fetch origin <ref>
+ p = await asyncio.create_subprocess_exec('git', 'fetch', 'origin', data['patchSet']['ref'])
+ await p.communicate()
+ p = await asyncio.create_subprocess_exec('git', 'checkout', 'FETCH_HEAD')
+ await p.communicate()
+ os.chdir('..')
+
+
+async def run_flake8(data):
+ """ Run flake8 on the patch. """
+
+ log.info("%s: Running flake8", data['change']['number'])
+ comments_per_file = {}
+ os.chdir(data['repo_name'])
+ # Get the list of files changed in this patch
+ p = await asyncio.create_subprocess_exec('git', 'diff-tree', '--no-commit-id',
+ '--name-status', '-r', 'FETCH_HEAD',
+ stdout=asyncio.subprocess.PIPE)
+ stdout, stderr = await p.communicate()
+ # Parse the output
+ changed_files = [] # Empty list to store file names
+ for line in stdout.decode().split('\n'):
+ if len(line) > 0 and not line.startswith("D"): # Skip empty lines and deleted files
+ changed_files.append(line.split('\t')[1]) # Add the file name to the list
+
+
+
+ ignore_patterns = ["rc_*.py", "*_rc.py", "ui_*.py"]
+
+ # Use the .flake8 file in the repo
+ flake8_config = os.path.join(os.getcwd(), '.flake8')
+ log.info("Trying flake8 config: %s", flake8_config)
+ fallback_config = False
+ if not os.path.exists(flake8_config):
+ fallback_config = True
+ log.warning("No .flake8 file found. Using default config.")
+ # Create a default .flake8 file if it doesn't exist in a temp file
+ with open(tempfile.NamedTemporaryFile(delete=False).name, 'w', encoding='utf-8') as f:
+ f.write(
+ """[flake8]
+ignore = E115,E265,W503
+max-line-length = 100
+exclude = rc_*.py,*_rc.py,ui_*.py
+per-file-ignores =
+ # for init_test_paths() hack
+ *_test_*.py:E402
+ __init__.py:F401,E402
+"""
+ )
+ flake8_config = f.name
+ checked_file_count = 0
+ # Run flake8 on each file
+ for file in changed_files:
+ if not file.endswith('.py'):
+ continue # Don't call flake8 on non-python files
+ if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns):
+ continue # Skip this file if it matches any of the other ignore patterns
+
+ checked_file_count += 1
+
+ # Run flake8 on the diff
+ process = await asyncio.create_subprocess_exec(
+ 'flake8',
+ '--config=' + flake8_config,
+ '--format=%(path)s;;%(row)d;;%(code)s;;%(text)s',
+ file,
+ stdout=asyncio.subprocess.PIPE
+ )
+ stdout, stderr = await process.communicate()
+ # Parse the output
+ output = stdout.decode()
+ log.debug("Flake8 output: %s", output)
+ # Split the output into lines
+ lines = output.split('\n')
+ # Parse each line
+ for line in lines:
+ # Split the line into its components
+ components = line.split(';;')
+ if len(components) == 4:
+ # Parse the line components
+ file_name = components[0]
+ line_number = components[1]
+ error_code = components[2]
+ error_message = components[3]
+ # Add the comment to the list of comments for this file
+ if file_name not in comments_per_file and file_name in changed_files:
+ comments_per_file[file_name] = []
+ comments_per_file[file_name].append(
+ {'line': line_number, 'message': error_code + ': ' + error_message})
+ if fallback_config:
+ os.remove(flake8_config)
+ os.chdir('..')
+ if checked_file_count == 0:
+ log.info("%s: No python files changed.",
+ data['change']['number'])
+ return None, 0
+ log.info("%s: Comments: %s", data['change']['number'], json.dumps(comments_per_file))
+ return comments_per_file, checked_file_count
+
+
+def generate_review(comments_per_file, change_number):
+ """ Generate a review from the comments. """
+
+ log.info("Generating review")
+ if not comments_per_file or len(comments_per_file.keys()) == 0:
+ review = {
+ 'message': "No flake8 issues found. Looks good.",
+ 'labels': {'Sanity-Review': 1},
+ 'tag': "autogenerated:flake8"
+ }
+ return review
+ ported_comments = fetch_ported_comments(change_number)
+ comment_inputs = {}
+ for file_name, comments in comments_per_file.items():
+ for comment in comments:
+ skip_duplicate = False
+ # Check if this comment is a ported comment
+ if ported_comments and ported_comments.get(file_name):
+ for ported_comment in ported_comments[file_name]:
+ if ported_comment['message'] == comment['message'] and \
+ (ported_comment['line'] == comment.get('line') or not comment.get('line')):
+ skip_duplicate = True
+ log.debug("Skipping duplicate comment: %s", comment['message'])
+ break
+ if skip_duplicate:
+ continue
+ if file_name not in comment_inputs:
+ comment_inputs[file_name] = []
+ comment_inputs[file_name].append({
+ 'line': comment['line'],
+ 'message': comment['message'],
+ 'unresolved': 'true'
+ })
+ message = "Flake8 identified issues in this change."
+ if not comment_inputs:
+ message = "Flake8 identified issues which remain unresolved from a previous patchset." \
+ " Please address those issues."
+ review = {
+ 'message': message,
+ 'comments': comment_inputs,
+ 'labels': {'Code-Review': -1}
+ }
+ return review
+
+def fetch_ported_comments(changeId):
+ """ Fetch the ported comments from the Gerrit API.
+ Returns a map of CommentInfo objects keyed by file name."""
+
+ log.info("%s: Fetching ported comments", changeId)
+ url = f"https://codereview.qt-project.org/a/changes/{changeId}/revisions/current/ported_comments"
+ headers = {'Content-Type': 'application/json;charset=UTF-8',
+ 'Authorization': 'Basic ' + GERRIT_AUTH}
+ req = Request(url, headers=headers)
+ try:
+ response = urlopen(req)
+ except HTTPError as e:
+ log.info('Error fetching ported comments: %s %s', str(e.code), e.reason)
+ except URLError as e:
+ log.info('Error fetching ported comments: %s', str(e.reason))
+ else:
+ data = json.loads(response.read().decode('utf-8').replace(")]}'", ''))
+ log.info("%s: Ported comments: %s", changeId, json.dumps(data))
+ return data
+ return None
+
+async def post_review(data, review, retry=0):
+ """ Post the review to Gerrit. """
+
+ log.info("%s: Posting review", data['change']['number'])
+ change_number = data['change']['number']
+ revision = data['patchSet']['revision']
+ url = f"https://codereview.qt-project.org/a/changes/{change_number}/revisions/{revision}/review"
+ review_data = json.dumps(review).encode('utf-8')
+ headers = {'Content-Type': 'application/json;charset=UTF-8',
+ 'Authorization': 'Basic ' + GERRIT_AUTH}
+ req = Request(url, review_data, headers)
+ log.info('%s: Review data: %s', change_number, review_data)
+ try:
+ response = urlopen(req)
+ except HTTPError as e:
+ # log.info('Error posting review: %s %s', str(e.code), e.reason)
+ if e.code == 409:
+ # Lock failure. Try again for up to 10 times recursively.
+ if retry < 10:
+ log.info('%s: Retrying due to 409 Lock Failure...', change_number)
+ await asyncio.sleep(5)
+ await post_review(data, review, retry + 1)
+ except URLError as e:
+ log.info('Error posting review: %s', str(e.reason))
+ else:
+ log.info('%s: Review posted successfully.', change_number)
+
+
+async def handle(request):
+ """ Handle the incoming webhook request. """
+ body = await request.text()
+ data = json.loads(body)
+
+ # Make sure the change is in state NEW
+ if data['change']['status'] != 'NEW':
+ return web.Response(status=200)
+
+ # make sure it's a patchset-created event
+ if data['type'] != 'patchset-created':
+ return web.Response(status=200)
+
+ # Only act on pyside repos.
+ if not data['change']['project'].startswith('pyside'):
+ return web.Response(status=200)
+
+ data['repo_name'] = urlparse(data['change']['project']).path.split('/')[-1]
+
+ log.info("%s: Received webhook for %s", data['change']['number'], data['patchSet']['revision'])
+
+ # Request a lock on the git repo
+ try:
+ log.info("%s: Acquiring lock", data['change']['number'])
+ await semaphore.acquire()
+ await clone_repo(data)
+ await checkout_patch(data)
+ issues, file_count = await run_flake8(data)
+ except Exception as e:
+ log.error("Error: %s", str(e))
+ return web.Response(status=200)
+ finally:
+ log.info("%s: Releasing lock", data['change']['number'])
+ semaphore.release()
+
+ # create a review with the comments if any python files were reviewed
+ if file_count > 0:
+ review = generate_review(issues, data['change']['number'])
+ await post_review(data, review)
+
+ return web.Response(status=200)
+
+
+async def run_web_server():
+ """ Run the web server. """
+ app = web.Application()
+ app.add_routes([web.post('/', handle)])
+ runner = web.AppRunner(app)
+ await runner.setup()
+ port = os.environ.get("PORT") or 8088
+ site = web.TCPSite(runner, 'localhost', port)
+ await site.start()
+ log.info("Web server started on port %s", port)
+
+loop = asyncio.new_event_loop()
+loop.create_task(run_web_server())
+loop.run_forever()