blob: ba5997f68a615f49322ac009a15aebf0a5a071f3 [file] [log] [blame]
#!/usr/bin/env python
# The service provided by this script is not critical, but it shares a GitHub
# API request quota with critical services. For this reason, all requests to
# the GitHub API are preceded by a "guard" which verifies that the subsequent
# request will not deplete the shared quota.
#
# In effect, this script will fail rather than interfere with the operation of
# critical services.
import argparse
import json
import logging
import os
import subprocess
import time
import requests
# The ratio of "requests remaining" to "total request quota" below which this
# script should refuse to interact with the GitHub.com API
API_RATE_LIMIT_THRESHOLD = 0.2
# The GitHub Pull Request label which indicates that a Pull Request is expected
# to be actively mirrored by the preview server
LABEL = 'safe for preview'
# The number of seconds to wait between attempts to verify that a submission
# preview is available on the Pull Request preview server
POLLING_PERIOD = 5
# Pull Requests from authors with the following associations to the project
# should automatically receive previews
#
# https://developer.github.com/v4/enum/commentauthorassociation/ (equivalent
# documentation for the REST API was not available at the time of writing)
TRUSTED_AUTHOR_ASSOCIATIONS = ('COLLABORATOR', 'MEMBER', 'OWNER')
# These GitHub accounts are not associated with individuals, and the Pull
# Requests they submit rarely require a preview.
AUTOMATION_GITHUB_USERS = (
'autofoolip', 'chromium-wpt-export-bot', 'moz-wptsync-bot',
'servo-wpt-sync'
)
DEPLOYMENT_PREFIX = 'wpt-preview-'
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def gh_request(method_name, url, body=None, media_type=None):
github_token = os.environ['DEPLOY_TOKEN']
kwargs = {
'headers': {
'Authorization': 'token {}'.format(github_token),
'Accept': media_type or 'application/vnd.github.v3+json'
}
}
method = getattr(requests, method_name.lower())
if body is not None:
kwargs['json'] = body
logger.info('Issuing request: %s %s', method_name.upper(), url)
resp = method(url, **kwargs)
logger.info('Response status code: %s', resp.status_code)
resp.raise_for_status()
return resp.json()
def guard(resource):
'''Decorate a `Project` instance method which interacts with the GitHub
API, ensuring that the subsequent request will not deplete the relevant
allowance. This verification does not itself influence rate limiting:
> Accessing this endpoint does not count against your REST API rate limit.
https://developer.github.com/v3/rate_limit/
'''
def guard_decorator(func):
def wrapped(self, *args, **kwargs):
limits = gh_request('GET', '{}/rate_limit'.format(self._host))
values = limits['resources'].get(resource)
remaining = values['remaining']
limit = values['limit']
logger.info(
'Limit for "%s" resource: %s/%s', resource, remaining, limit
)
if limit and float(remaining) / limit < API_RATE_LIMIT_THRESHOLD:
raise Exception(
'Exiting to avoid GitHub.com API request throttling.'
)
return func(self, *args, **kwargs)
return wrapped
return guard_decorator
class Project(object):
def __init__(self, host, github_project):
self._host = host
self._github_project = github_project
@guard('search')
def get_pull_requests(self, updated_since):
window_start = time.strftime('%Y-%m-%dT%H:%M:%SZ', updated_since)
url = '{}/search/issues?q=repo:{}+is:pr+updated:>{}'.format(
self._host, self._github_project, window_start
)
logger.info(
'Searching for Pull Requests updated since %s', window_start
)
data = gh_request('GET', url)
logger.info('Found %d Pull Requests', len(data['items']))
if data['incomplete_results']:
raise Exception('Incomplete results')
return data['items']
@guard('core')
def pull_request_is_from_fork(self, pull_request):
pr_number = pull_request['number']
url = '{}/repos/{}/pulls/{}'.format(
self._host, self._github_project, pr_number
)
logger.info('Checking if pull request %s is from a fork', pr_number)
data = gh_request('GET', url)
repo_name = data['head']['repo']['full_name']
is_fork = repo_name != self._github_project
logger.info(
'Pull request %s is from \'%s\'. Is a fork: %s',
pr_number, repo_name, is_fork
)
return is_fork
@guard('core')
def create_ref(self, refspec, revision):
url = '{}/repos/{}/git/refs'.format(self._host, self._github_project)
logger.info('Creating ref "%s" (%s)', refspec, revision)
gh_request('POST', url, {
'ref': 'refs/{}'.format(refspec),
'sha': revision
})
@guard('core')
def update_ref(self, refspec, revision):
url = '{}/repos/{}/git/refs/{}'.format(
self._host, self._github_project, refspec
)
logger.info('Updating ref "%s" (%s)', refspec, revision)
gh_request('PATCH', url, {'sha': revision})
@guard('core')
def create_deployment(self, pull_request, revision):
url = '{}/repos/{}/deployments'.format(
self._host, self._github_project
)
# The Pull Request preview system only exposes one Deployment for a
# given Pull Request. Identifying the Deployment by the Pull Request
# number ensures that GitHub.com automatically responds to new
# Deployments by designating prior Deployments as "inactive"
environment = DEPLOYMENT_PREFIX + str(pull_request['number'])
logger.info('Creating Deployment "%s" for "%s"', environment, revision)
return gh_request('POST', url, {
'ref': revision,
'environment': environment,
'auto_merge': False,
# Pull Request previews are created regardless of GitHub Commit
# Status Checks, so Status Checks should be ignored when creating
# GitHub Deployments.
'required_contexts': []
}, 'application/vnd.github.ant-man-preview+json')
@guard('core')
def get_deployment(self, revision):
url = '{}/repos/{}/deployments?sha={}'.format(
self._host, self._github_project, revision
)
deployments = gh_request('GET', url)
return deployments.pop() if len(deployments) else None
@guard('core')
def update_deployment(self, target, deployment, state, description=''):
if state in ('pending', 'success'):
pr_number = deployment['environment'][len(DEPLOYMENT_PREFIX):]
environment_url = '{}/{}'.format(target, pr_number)
else:
environment_url = None
url = '{}/repos/{}/deployments/{}/statuses'.format(
self._host, self._github_project, deployment['id']
)
gh_request('POST', url, {
'state': state,
'description': description,
'environment_url': environment_url
}, 'application/vnd.github.ant-man-preview+json')
class Remote(object):
def __init__(self, github_project):
# The repository in the GitHub Actions environment is configured with
# a remote whose URL uses unauthenticated HTTPS, making it unsuitable
# for pushing changes.
self._token = os.environ['DEPLOY_TOKEN']
def get_revision(self, refspec):
output = subprocess.check_output([
'git',
'-c',
'credential.username={}'.format(self._token),
'-c',
'core.askPass=true',
'ls-remote',
'origin',
'refs/{}'.format(refspec)
])
if not output:
return None
return output.decode('utf-8').split()[0]
def delete_ref(self, refspec):
full_ref = 'refs/{}'.format(refspec)
logger.info('Deleting ref "%s"', refspec)
subprocess.check_call([
'git',
'-c',
'credential.username={}'.format(self._token),
'-c',
'core.askPass=true',
'push',
'origin',
'--delete',
full_ref
])
def is_open(pull_request):
return not pull_request['closed_at']
def has_mirroring_label(pull_request):
for label in pull_request['labels']:
if label['name'] == LABEL:
return True
return False
def should_be_mirrored(project, pull_request):
return (
is_open(pull_request) and (
has_mirroring_label(pull_request) or (
pull_request['user']['login'] not in AUTOMATION_GITHUB_USERS and
pull_request['author_association'] in TRUSTED_AUTHOR_ASSOCIATIONS
)
) and
# Query this last as it requires another API call to verify
not project.pull_request_is_from_fork(pull_request)
)
def is_deployed(host, deployment):
worktree_name = deployment['environment'][len(DEPLOYMENT_PREFIX):]
response = requests.get(
'{}/.git/worktrees/{}/HEAD'.format(host, worktree_name)
)
if response.status_code != 200:
return False
return response.text.strip() == deployment['sha']
def synchronize(host, github_project, window):
'''Inspect all Pull Requests which have been modified in a given window of
time. Add or remove the "preview" label and update or delete the relevant
git refs according to the status of each Pull Request.'''
project = Project(host, github_project)
remote = Remote(github_project)
pull_requests = project.get_pull_requests(
time.gmtime(time.time() - window)
)
for pull_request in pull_requests:
logger.info('Processing Pull Request #%(number)d', pull_request)
refspec_trusted = 'prs-trusted-for-preview/{number}'.format(
**pull_request
)
refspec_open = 'prs-open/{number}'.format(**pull_request)
revision_latest = remote.get_revision(
'pull/{number}/head'.format(**pull_request)
)
revision_trusted = remote.get_revision(refspec_trusted)
revision_open = remote.get_revision(refspec_open)
if should_be_mirrored(project, pull_request):
logger.info('Pull Request should be mirrored')
if revision_trusted is None:
project.create_ref(refspec_trusted, revision_latest)
elif revision_trusted != revision_latest:
project.update_ref(refspec_trusted, revision_latest)
if revision_open is None:
project.create_ref(refspec_open, revision_latest)
elif revision_open != revision_latest:
project.update_ref(refspec_open, revision_latest)
if project.get_deployment(revision_latest) is None:
project.create_deployment(
pull_request, revision_latest
)
else:
logger.info('Pull Request should not be mirrored')
if not has_mirroring_label(pull_request) and revision_trusted is not None:
remote.delete_ref(refspec_trusted)
if revision_open is not None and not is_open(pull_request):
remote.delete_ref(refspec_open)
def detect(host, github_project, target, timeout):
'''Manage the status of a GitHub Deployment by polling the Pull Request
preview website until the Deployment is complete or a timeout is
reached.'''
project = Project(host, github_project)
with open(os.environ['GITHUB_EVENT_PATH']) as handle:
data = json.loads(handle.read())
logger.info('Event data: %s', json.dumps(data, indent=2))
deployment = data['deployment']
if not deployment['environment'].startswith(DEPLOYMENT_PREFIX):
logger.info(
'Deployment environment "%s" is unrecognized. Exiting.',
deployment['environment']
)
return
message = 'Waiting up to {} seconds for Deployment {} to be available on {}'.format(
timeout, deployment['environment'], target
)
logger.info(message)
project.update_deployment(target, deployment, 'pending', message)
start = time.time()
while not is_deployed(target, deployment):
if time.time() - start > timeout:
message = 'Deployment did not become available after {} seconds'.format(timeout)
project.update_deployment(target, deployment, 'error', message)
raise Exception(message)
time.sleep(POLLING_PERIOD)
result = project.update_deployment(target, deployment, 'success')
logger.info(json.dumps(result, indent=2))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='''Synchronize the state of a GitHub.com project with the
underlying git repository in order to support a externally-hosted
Pull Request preview system. Communicate the state of that system
via GitHub Deployments associated with each Pull Request.'''
)
parser.add_argument(
'--host', required=True, help='the location of the GitHub API server'
)
parser.add_argument(
'--github-project',
required=True,
help='''the GitHub organization and GitHub project name, separated by
a forward slash (e.g. "web-platform-tests/wpt")'''
)
subparsers = parser.add_subparsers(title='subcommands')
parser_sync = subparsers.add_parser(
'synchronize', help=synchronize.__doc__
)
parser_sync.add_argument(
'--window',
type=int,
required=True,
help='''the number of seconds prior to the current moment within which
to search for GitHub Pull Requests. Any Pull Requests updated in
this time frame will be considered for synchronization.'''
)
parser_sync.set_defaults(func=synchronize)
parser_detect = subparsers.add_parser('detect', help=detect.__doc__)
parser_detect.add_argument(
'--target',
required=True,
help='''the URL of the website to which submission previews are
expected to become available'''
)
parser_detect.add_argument(
'--timeout',
type=int,
required=True,
help='''the number of seconds to wait for a submission preview to
become available before reporting a GitHub Deployment failure'''
)
parser_detect.set_defaults(func=detect)
values = dict(vars(parser.parse_args()))
values.pop('func')(**values)