Initial commit for QtFlake8Bot\n\nTask-number: QTQAINFRA-6468

author: Daniel Smith <daniel.smith@qt.io> 2024-07-26 10:48:16 +0200
committer: Daniel Smith <daniel.smith@qt.io> 2024-07-26 10:48:16 +0200
commit: f7c6fc29ca6484712086fecb3dc54dddea86f017 (patch)
tree: 6ef558cc06ea24518746fd351a949e2f3839b873 /main.py
1 files changed, 344 insertions, 0 deletions
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..0f71243
--- /dev/null
+++ b/main.py
@@ -0,0 +1,344 @@
+# Copyright (C) 2023 The Qt Company Ltd.
+# Contact: https://www.qt.io/licensing/
+#
+# You may use this file under the terms of the 3-clause BSD license.
+# See the file LICENSE in qt/qtrepotools for details.
+#
+
+""" This script listens for incoming webhook requests of patchset-created type
+    from Gerrit, checks out the patch locally, and runs flakes8 on it.
+    It then posts a comment for each issue identified to Gerrit with the results.
+"""
+
+import json
+import os
+import sys
+import asyncio
+import base64
+import fnmatch
+import atexit
+from urllib.parse import urlparse
+from urllib.request import urlopen, Request
+from urllib.error import HTTPError, URLError
+import logging
+from systemd.journal import JournalHandler
+import tempfile
+
+from aiohttp import web
+
+log = logging.getLogger('flake8_bot')
+log.addHandler(JournalHandler())
+log.setLevel(logging.INFO)
+
+
+GERRIT_USERNAME = os.environ.get('GERRIT_USERNAME')
+GERRIT_PASSWORD = os.environ.get('GERRIT_PASSWORD')
+
+if not GERRIT_USERNAME or not GERRIT_PASSWORD:
+    log.info('Please set the GERRIT_USERNAME and GERRIT_PASSWORD environment variables.')
+    sys.exit(1)
+
+# Base64 encode the username and password
+GERRIT_AUTH = GERRIT_USERNAME + ':' + GERRIT_PASSWORD
+GERRIT_AUTH = GERRIT_AUTH.encode('utf-8')
+GERRIT_AUTH = base64.b64encode(GERRIT_AUTH).decode('utf-8')
+
+class Lock:
+    """ Set up a semaphore-like lock to prevent acting on the git repo while it is in use.
+        This is necessary because the Gerrit webhook is asynchronous, so multiple
+        requests may come in at once.
+    """
+    def __init__(self):
+        self.locked = False
+
+    async def acquire(self):
+        """ Acquire the lock. """
+        while True:
+            if self.locked:
+                await asyncio.sleep(1)
+                continue
+            self.locked = True
+            break  # If the lock is acquired successfully, break the loop
+
+    def release(self):
+        """ Release the lock. """
+        self.locked = False
+
+semaphore = Lock()
+
+async def clone_repo(data):
+    """ Clone the target repo and check out the branch. """
+    log.info("Cloning repo %s", data['change']['project'])
+    if os.path.exists(data['repo_name']):
+        # return if the repo already exists
+        return
+    repo_url = "https://codereview.qt-project.org/" + data['change']['project'] + ".git"
+    # Clone the repo
+    p = await asyncio.create_subprocess_exec('git', 'clone', repo_url, data['repo_name'])
+    await p.communicate()
+    os.chdir(data['repo_name'])
+    # Check out the branch
+    p = await asyncio.create_subprocess_exec('git', 'checkout', data['change']['branch'])
+    await p.communicate()
+    os.chdir('..')
+
+
+async def checkout_patch(data):
+    """ Check out the patch. """
+
+    log.info("%s: Checking out patch", data['change']['number'])
+    # Check out the patch
+    os.chdir(data['repo_name'])
+    # git clean -fdx first to remove any untracked files
+    p = await asyncio.create_subprocess_exec('git', 'clean', '-fdx')
+    await p.communicate()
+    # git fetch origin <ref>
+    p = await asyncio.create_subprocess_exec('git', 'fetch', 'origin', data['patchSet']['ref'])
+    await p.communicate()
+    p = await asyncio.create_subprocess_exec('git', 'checkout', 'FETCH_HEAD')
+    await p.communicate()
+    os.chdir('..')
+
+
+async def run_flake8(data):
+    """ Run flake8 on the patch. """
+
+    log.info("%s: Running flake8", data['change']['number'])
+    comments_per_file = {}
+    os.chdir(data['repo_name'])
+    # Get the list of files changed in this patch
+    p = await asyncio.create_subprocess_exec('git', 'diff-tree', '--no-commit-id',
+                                             '--name-status', '-r', 'FETCH_HEAD',
+                                             stdout=asyncio.subprocess.PIPE)
+    stdout, stderr = await p.communicate()
+    # Parse the output
+    changed_files = []  # Empty list to store file names
+    for line in stdout.decode().split('\n'):
+        if len(line) > 0 and not line.startswith("D"):  # Skip empty lines and deleted files
+            changed_files.append(line.split('\t')[1])  # Add the file name to the list
+
+
+
+    ignore_patterns = ["rc_*.py", "*_rc.py", "ui_*.py"]
+
+    # Use the .flake8 file in the repo
+    flake8_config = os.path.join(os.getcwd(), '.flake8')
+    log.info("Trying flake8 config: %s", flake8_config)
+    fallback_config = False
+    if not os.path.exists(flake8_config):
+        fallback_config = True
+        log.warning("No .flake8 file found. Using default config.")
+        # Create a default .flake8 file if it doesn't exist in a temp file
+        with open(tempfile.NamedTemporaryFile(delete=False).name, 'w', encoding='utf-8') as f:
+            f.write(
+                """[flake8]
+ignore = E115,E265,W503
+max-line-length = 100
+exclude = rc_*.py,*_rc.py,ui_*.py
+per-file-ignores =
+    # for init_test_paths() hack
+    *_test_*.py:E402
+    __init__.py:F401,E402
+"""
+            )
+            flake8_config = f.name
+    checked_file_count = 0
+    # Run flake8 on each file
+    for file in changed_files:
+        if not file.endswith('.py'):
+            continue # Don't call flake8 on non-python files
+        if any(fnmatch.fnmatch(file, pattern) for pattern in ignore_patterns):
+            continue  # Skip this file if it matches any of the other ignore patterns
+
+        checked_file_count += 1
+
+        # Run flake8 on the diff
+        process = await asyncio.create_subprocess_exec(
+            'flake8',
+            '--config=' + flake8_config,
+            '--format=%(path)s;;%(row)d;;%(code)s;;%(text)s',
+            file,
+            stdout=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        # Parse the output
+        output = stdout.decode()
+        log.debug("Flake8 output: %s", output)
+        # Split the output into lines
+        lines = output.split('\n')
+        # Parse each line
+        for line in lines:
+            # Split the line into its components
+            components = line.split(';;')
+            if len(components) == 4:
+                # Parse the line components
+                file_name = components[0]
+                line_number = components[1]
+                error_code = components[2]
+                error_message = components[3]
+                # Add the comment to the list of comments for this file
+                if file_name not in comments_per_file and file_name in changed_files:
+                    comments_per_file[file_name] = []
+                comments_per_file[file_name].append(
+                    {'line': line_number, 'message': error_code + ': ' + error_message})
+    if fallback_config:
+        os.remove(flake8_config)
+    os.chdir('..')
+    if checked_file_count == 0:
+        log.info("%s: No python files changed.",
+                data['change']['number'])
+        return None, 0
+    log.info("%s: Comments: %s", data['change']['number'], json.dumps(comments_per_file))
+    return comments_per_file, checked_file_count
+
+
+def generate_review(comments_per_file, change_number):
+    """ Generate a review from the comments. """
+
+    log.info("Generating review")
+    if not comments_per_file or len(comments_per_file.keys()) == 0:
+        review = {
+            'message': "No flake8 issues found. Looks good.",
+            'labels': {'Sanity-Review': 1},
+            'tag': "autogenerated:flake8"
+        }
+        return review
+    ported_comments = fetch_ported_comments(change_number)
+    comment_inputs = {}
+    for file_name, comments in comments_per_file.items():
+        for comment in comments:
+            skip_duplicate = False
+            # Check if this comment is a ported comment
+            if ported_comments and ported_comments.get(file_name):
+                for ported_comment in ported_comments[file_name]:
+                    if ported_comment['message'] == comment['message'] and \
+                        (ported_comment['line'] == comment.get('line') or not comment.get('line')):
+                        skip_duplicate = True
+                        log.debug("Skipping duplicate comment: %s", comment['message'])
+                        break
+            if skip_duplicate:
+                continue
+            if file_name not in comment_inputs:
+                comment_inputs[file_name] = []
+            comment_inputs[file_name].append({
+                'line': comment['line'],
+                'message': comment['message'],
+                'unresolved': 'true'
+            })
+    message = "Flake8 identified issues in this change."
+    if not comment_inputs:
+        message = "Flake8 identified issues which remain unresolved from a previous patchset." \
+                  " Please address those issues."
+    review = {
+        'message': message,
+        'comments': comment_inputs,
+        'labels': {'Code-Review': -1}
+    }
+    return review
+
+def fetch_ported_comments(changeId):
+    """ Fetch the ported comments from the Gerrit API. 
+    Returns a map of CommentInfo objects keyed by file name."""
+
+    log.info("%s: Fetching ported comments", changeId)
+    url = f"https://codereview.qt-project.org/a/changes/{changeId}/revisions/current/ported_comments"
+    headers = {'Content-Type': 'application/json;charset=UTF-8',
+               'Authorization': 'Basic ' + GERRIT_AUTH}
+    req = Request(url, headers=headers)
+    try:
+        response = urlopen(req)
+    except HTTPError as e:
+        log.info('Error fetching ported comments: %s %s', str(e.code), e.reason)
+    except URLError as e:
+        log.info('Error fetching ported comments: %s', str(e.reason))
+    else:
+        data = json.loads(response.read().decode('utf-8').replace(")]}'", ''))
+        log.info("%s: Ported comments: %s", changeId, json.dumps(data))
+        return data
+    return None
+
+async def post_review(data, review, retry=0):
+    """ Post the review to Gerrit. """
+
+    log.info("%s: Posting review", data['change']['number'])
+    change_number = data['change']['number']
+    revision = data['patchSet']['revision']
+    url = f"https://codereview.qt-project.org/a/changes/{change_number}/revisions/{revision}/review"
+    review_data = json.dumps(review).encode('utf-8')
+    headers = {'Content-Type': 'application/json;charset=UTF-8',
+               'Authorization': 'Basic ' + GERRIT_AUTH}
+    req = Request(url, review_data, headers)
+    log.info('%s: Review data: %s', change_number, review_data)
+    try:
+        response = urlopen(req)
+    except HTTPError as e:
+        # log.info('Error posting review: %s %s', str(e.code), e.reason)
+        if e.code == 409:
+            # Lock failure. Try again for up to 10 times recursively.
+            if retry < 10:
+                log.info('%s: Retrying due to 409 Lock Failure...', change_number)
+                await asyncio.sleep(5)
+                await post_review(data, review, retry + 1)
+    except URLError as e:
+        log.info('Error posting review: %s', str(e.reason))
+    else:
+        log.info('%s: Review posted successfully.', change_number)
+
+
+async def handle(request):
+    """ Handle the incoming webhook request. """
+    body = await request.text()
+    data = json.loads(body)
+
+    # Make sure the change is in state NEW
+    if data['change']['status'] != 'NEW':
+        return web.Response(status=200)
+
+    # make sure it's a patchset-created event
+    if data['type'] != 'patchset-created':
+        return web.Response(status=200)
+
+    # Only act on pyside repos.
+    if not data['change']['project'].startswith('pyside'):
+        return web.Response(status=200)
+
+    data['repo_name'] = urlparse(data['change']['project']).path.split('/')[-1]
+
+    log.info("%s: Received webhook for %s", data['change']['number'], data['patchSet']['revision'])
+
+    # Request a lock on the git repo
+    try:
+        log.info("%s: Acquiring lock", data['change']['number'])
+        await semaphore.acquire()
+        await clone_repo(data)
+        await checkout_patch(data)
+        issues, file_count = await run_flake8(data)
+    except Exception as e:
+        log.error("Error: %s", str(e))
+        return web.Response(status=200)
+    finally:
+        log.info("%s: Releasing lock", data['change']['number'])
+        semaphore.release()
+
+    # create a review with the comments if any python files were reviewed
+    if file_count > 0:
+        review = generate_review(issues, data['change']['number'])
+        await post_review(data, review)
+
+    return web.Response(status=200)
+
+
+async def run_web_server():
+    """ Run the web server. """
+    app = web.Application()
+    app.add_routes([web.post('/', handle)])
+    runner = web.AppRunner(app)
+    await runner.setup()
+    port = os.environ.get("PORT") or 8088
+    site = web.TCPSite(runner, 'localhost', port)
+    await site.start()
+    log.info("Web server started on port %s", port)
+
+loop = asyncio.new_event_loop()
+loop.create_task(run_web_server())
+loop.run_forever()
author	Daniel Smith <daniel.smith@qt.io>	2024-07-26 10:48:16 +0200
committer	Daniel Smith <daniel.smith@qt.io>	2024-07-26 10:48:16 +0200
commit	f7c6fc29ca6484712086fecb3dc54dddea86f017 (patch)
tree	6ef558cc06ea24518746fd351a949e2f3839b873 /main.py