diff options
| author | Daniel Smith <daniel.smith@qt.io> | 2024-07-26 10:48:16 +0200 |
|---|---|---|
| committer | Daniel Smith <daniel.smith@qt.io> | 2024-07-26 10:48:16 +0200 |
| commit | f7c6fc29ca6484712086fecb3dc54dddea86f017 (patch) | |
| tree | 6ef558cc06ea24518746fd351a949e2f3839b873 /main.py | |
Initial commit for QtFlake8Bot\n\nTask-number: QTQAINFRA-6468
Diffstat (limited to 'main.py')
| -rw-r--r-- | main.py | 344 |
1 files changed, 344 insertions, 0 deletions
@@ -0,0 +1,344 @@ +# Copyright (C) 2023 The Qt Company Ltd. +# Contact: https://www.qt.io/licensing/ +# +# You may use this file under the terms of the 3-clause BSD license. +# See the file LICENSE in qt/qtrepotools for details. +# + +""" This script listens for incoming webhook requests of patchset-created type + from Gerrit, checks out the patch locally, and runs flakes8 on it. + It then posts a comment for each issue identified to Gerrit with the results. +""" + +import json +import os +import sys +import asyncio +import base64 +import fnmatch +import atexit +from urllib.parse import urlparse +from urllib.request import urlopen, Request +from urllib.error import HTTPError, URLError +import logging +from systemd.journal import JournalHandler +import tempfile + +from aiohttp import web + +log = logging.getLogger('flake8_bot') +log.addHandler(JournalHandler()) +log.setLevel(logging.INFO) + + +GERRIT_USERNAME = os.environ.get('GERRIT_USERNAME') +GERRIT_PASSWORD = os.environ.get('GERRIT_PASSWORD') + +if not GERRIT_USERNAME or not GERRIT_PASSWORD: + log.info('Please set the GERRIT_USERNAME and GERRIT_PASSWORD environment variables.') + sys.exit(1) + +# Base64 encode the username and password +GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD +GERRIT_AUTH = GERRIT_AUTH.encode('utf-8') +GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8') + +class Lock: + """ Set up a semaphore-like lock to prevent acting on the git repo while it is in use. + This is necessary because the Gerrit webhook is asynchronous, so multiple + requests may come in at once. + """ + def __init__(self): + self.locked = False + + async def acquire(self): + """ Acquire the lock. """ + while True: + if self.locked: + await asyncio.sleep(1) + continue + self.locked = True + break # If the lock is acquired successfully, break the loop + + def release(self): + """ Release the lock. """ + self.locked = False + +semaphore = Lock() + +async def clone_repo(data): + """ Clone the target repo and check out the branch. """ + log.info("Cloning repo %s", data['change']['project']) + if os.path.exists(data['repo_name']): + # return if the repo already exists + return + repo_url = "https://codereview.qt-project.org/" + data['change']['project'] + ".git" + # Clone the repo + p = await asyncio.create_subprocess_exec('git', 'clone', repo_url, data['repo_name']) + await p.communicate() + os.chdir(data['repo_name']) + # Check out the branch + p = await asyncio.create_subprocess_exec('git', 'checkout', data['change']['branch']) + await p.communicate() + os.chdir('..') + + +async def checkout_patch(data): + """ Check out the patch. """ + + log.info("%s: Checking out patch", data['change']['number']) + # Check out the patch + os.chdir(data['repo_name']) + # git clean -fdx first to remove any untracked files + p = await asyncio.create_subprocess_exec('git', 'clean', '-fdx') + await p.communicate() + # git fetch origin <ref> + p = await asyncio.create_subprocess_exec('git', 'fetch', 'origin', data['patchSet']['ref']) + await p.communicate() + p = await asyncio.create_subprocess_exec('git', 'checkout', 'FETCH_HEAD') + await p.communicate() + os.chdir('..') + + +async def run_flake8(data): + """ Run flake8 on the patch. """ + + log.info("%s: Running flake8", data['change']['number']) + comments_per_file = {} + os.chdir(data['repo_name']) + # Get the list of files changed in this patch + p = await asyncio.create_subprocess_exec('git', 'diff-tree', '--no-commit-id', + '--name-status', '-r', 'FETCH_HEAD', + stdout=asyncio.subprocess.PIPE) + stdout, stderr = await p.communicate() + # Parse the output + changed_files = [] # Empty list to store file names + for line in stdout.decode().split('\n'): + if len(line) > 0 and not line.startswith("D"): # Skip empty lines and deleted files + changed_files.append(line.split('\t')[1]) # Add the file name to the list + + + + ignore_patterns = ["rc_*.py", "*_rc.py", "ui_*.py"] + + # Use the .flake8 file in the repo + flake8_config = os.path.join(os.getcwd(), '.flake8') + log.info("Trying flake8 config: %s", flake8_config) + fallback_config = False + if not os.path.exists(flake8_config): + fallback_config = True + log.warning("No .flake8 file found. Using default config.") + # Create a default .flake8 file if it doesn't exist in a temp file + with open(tempfile.NamedTemporaryFile(delete=False).name, 'w', encoding='utf-8') as f: + f.write( + """[flake8] +ignore = E115,E265,W503 +max-line-length = 100 +exclude = rc_*.py,*_rc.py,ui_*.py +per-file-ignores = + # for init_test_paths() hack + *_test_*.py:E402 + __init__.py:F401,E402 +""" + ) + flake8_config = f.name + checked_file_count = 0 + # Run flake8 on each file + for file in changed_files: + if not file.endswith('.py'): + continue # Don't call flake8 on non-python files + if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns): + continue # Skip this file if it matches any of the other ignore patterns + + checked_file_count += 1 + + # Run flake8 on the diff + process = await asyncio.create_subprocess_exec( + 'flake8', + '--config=' + flake8_config, + '--format=%(path)s;;%(row)d;;%(code)s;;%(text)s', + file, + stdout=asyncio.subprocess.PIPE + ) + stdout, stderr = await process.communicate() + # Parse the output + output = stdout.decode() + log.debug("Flake8 output: %s", output) + # Split the output into lines + lines = output.split('\n') + # Parse each line + for line in lines: + # Split the line into its components + components = line.split(';;') + if len(components) == 4: + # Parse the line components + file_name = components[0] + line_number = components[1] + error_code = components[2] + error_message = components[3] + # Add the comment to the list of comments for this file + if file_name not in comments_per_file and file_name in changed_files: + comments_per_file[file_name] = [] + comments_per_file[file_name].append( + {'line': line_number, 'message': error_code + ': ' + error_message}) + if fallback_config: + os.remove(flake8_config) + os.chdir('..') + if checked_file_count == 0: + log.info("%s: No python files changed.", + data['change']['number']) + return None, 0 + log.info("%s: Comments: %s", data['change']['number'], json.dumps(comments_per_file)) + return comments_per_file, checked_file_count + + +def generate_review(comments_per_file, change_number): + """ Generate a review from the comments. """ + + log.info("Generating review") + if not comments_per_file or len(comments_per_file.keys()) == 0: + review = { + 'message': "No flake8 issues found. Looks good.", + 'labels': {'Sanity-Review': 1}, + 'tag': "autogenerated:flake8" + } + return review + ported_comments = fetch_ported_comments(change_number) + comment_inputs = {} + for file_name, comments in comments_per_file.items(): + for comment in comments: + skip_duplicate = False + # Check if this comment is a ported comment + if ported_comments and ported_comments.get(file_name): + for ported_comment in ported_comments[file_name]: + if ported_comment['message'] == comment['message'] and \ + (ported_comment['line'] == comment.get('line') or not comment.get('line')): + skip_duplicate = True + log.debug("Skipping duplicate comment: %s", comment['message']) + break + if skip_duplicate: + continue + if file_name not in comment_inputs: + comment_inputs[file_name] = [] + comment_inputs[file_name].append({ + 'line': comment['line'], + 'message': comment['message'], + 'unresolved': 'true' + }) + message = "Flake8 identified issues in this change." + if not comment_inputs: + message = "Flake8 identified issues which remain unresolved from a previous patchset." \ + " Please address those issues." + review = { + 'message': message, + 'comments': comment_inputs, + 'labels': {'Code-Review': -1} + } + return review + +def fetch_ported_comments(changeId): + """ Fetch the ported comments from the Gerrit API. + Returns a map of CommentInfo objects keyed by file name.""" + + log.info("%s: Fetching ported comments", changeId) + url = f"https://codereview.qt-project.org/a/changes/{changeId}/revisions/current/ported_comments" + headers = {'Content-Type': 'application/json;charset=UTF-8', + 'Authorization': 'Basic ' + GERRIT_AUTH} + req = Request(url, headers=headers) + try: + response = urlopen(req) + except HTTPError as e: + log.info('Error fetching ported comments: %s %s', str(e.code), e.reason) + except URLError as e: + log.info('Error fetching ported comments: %s', str(e.reason)) + else: + data = json.loads(response.read().decode('utf-8').replace(")]}'", '')) + log.info("%s: Ported comments: %s", changeId, json.dumps(data)) + return data + return None + +async def post_review(data, review, retry=0): + """ Post the review to Gerrit. """ + + log.info("%s: Posting review", data['change']['number']) + change_number = data['change']['number'] + revision = data['patchSet']['revision'] + url = f"https://codereview.qt-project.org/a/changes/{change_number}/revisions/{revision}/review" + review_data = json.dumps(review).encode('utf-8') + headers = {'Content-Type': 'application/json;charset=UTF-8', + 'Authorization': 'Basic ' + GERRIT_AUTH} + req = Request(url, review_data, headers) + log.info('%s: Review data: %s', change_number, review_data) + try: + response = urlopen(req) + except HTTPError as e: + # log.info('Error posting review: %s %s', str(e.code), e.reason) + if e.code == 409: + # Lock failure. Try again for up to 10 times recursively. + if retry < 10: + log.info('%s: Retrying due to 409 Lock Failure...', change_number) + await asyncio.sleep(5) + await post_review(data, review, retry + 1) + except URLError as e: + log.info('Error posting review: %s', str(e.reason)) + else: + log.info('%s: Review posted successfully.', change_number) + + +async def handle(request): + """ Handle the incoming webhook request. """ + body = await request.text() + data = json.loads(body) + + # Make sure the change is in state NEW + if data['change']['status'] != 'NEW': + return web.Response(status=200) + + # make sure it's a patchset-created event + if data['type'] != 'patchset-created': + return web.Response(status=200) + + # Only act on pyside repos. + if not data['change']['project'].startswith('pyside'): + return web.Response(status=200) + + data['repo_name'] = urlparse(data['change']['project']).path.split('/')[-1] + + log.info("%s: Received webhook for %s", data['change']['number'], data['patchSet']['revision']) + + # Request a lock on the git repo + try: + log.info("%s: Acquiring lock", data['change']['number']) + await semaphore.acquire() + await clone_repo(data) + await checkout_patch(data) + issues, file_count = await run_flake8(data) + except Exception as e: + log.error("Error: %s", str(e)) + return web.Response(status=200) + finally: + log.info("%s: Releasing lock", data['change']['number']) + semaphore.release() + + # create a review with the comments if any python files were reviewed + if file_count > 0: + review = generate_review(issues, data['change']['number']) + await post_review(data, review) + + return web.Response(status=200) + + +async def run_web_server(): + """ Run the web server. """ + app = web.Application() + app.add_routes([web.post('/', handle)]) + runner = web.AppRunner(app) + await runner.setup() + port = os.environ.get("PORT") or 8088 + site = web.TCPSite(runner, 'localhost', port) + await site.start() + log.info("Web server started on port %s", port) + +loop = asyncio.new_event_loop() +loop.create_task(run_web_server()) +loop.run_forever() |
