blob: 4db40ff0368d3059879f422856fd63f94fa6b895 [file] [log] [blame]
# Copyright (C) 2020 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import bisect
import calendar
import fasteners
import json
import os
import re
import requests
import tempfile
import xmltodict
from datetime import datetime
from webkitcorepy import decorators, string_utils
from webkitscmpy.remote.scm import Scm
from webkitscmpy import Commit, Version
class Svn(Scm):
URL_RE = re.compile(r'\Ahttps?://svn.(?P<host>\S+)/repository/\S+\Z')
DATA_RE = re.compile(br'<[SD]:(?P<tag>\S+)>(?P<content>.*)</[SD]:.+>')
CACHE_VERSION = Version(1)
@classmethod
def is_webserver(cls, url):
return True if cls.URL_RE.match(url) else False
def __init__(self, url, dev_branches=None, prod_branches=None, contributors=None, id=None, cache_path=None):
if url[-1] != '/':
url += '/'
if not self.is_webserver(url):
raise self.Exception("'{}' is not a valid SVN webserver".format(url))
super(Svn, self).__init__(
url,
dev_branches=dev_branches, prod_branches=prod_branches,
contributors=contributors,
id=id or url.split('/')[-2].lower(),
)
if not cache_path:
from webkitscmpy.mocks import remote
host = 'svn.{}'.format(self.URL_RE.match(self.url).group('host'))
if host in remote.Svn.remotes:
host = 'mock-{}'.format(host)
cache_path = os.path.join(tempfile.gettempdir(), host, 'webkitscmpy-cache.json')
self._cache_path = cache_path
if os.path.exists(self._cache_path):
try:
with self._cache_lock(), open(self._cache_path) as file:
self._metadata_cache = json.load(file)
except BaseException:
self._metadata_cache = dict(version=str(self.CACHE_VERSION))
else:
self._metadata_cache = dict(version=str(self.CACHE_VERSION))
@property
def is_svn(self):
return True
@decorators.Memoize(timeout=60)
def _latest(self):
response = requests.request(
method='OPTIONS',
url=self.url,
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<?xml version="1.0" encoding="utf-8"?>\n'
'<D:options xmlns:D="DAV:">\n'
' <D:activity-collection-set></D:activity-collection-set>\n'
'</D:options>\n',
)
if response.status_code != 200:
return None
return int(response.headers.get('SVN-Youngest-Rev'))
@decorators.Memoize(cached=False)
def info(self, branch=None, revision=None, tag=None):
if tag and branch:
raise ValueError('Cannot specify both branch and tag')
if tag and revision:
raise ValueError('Cannot specify both branch and tag')
if not revision:
branch = branch or self.default_branch
revision = self._latest()
if not revision:
return None
if not revision:
raise ValueError('Failed to find the latest revision')
url = '{}!svn/rvr/{}'.format(self.url, revision)
if branch and branch != self.default_branch and '/' not in branch:
url = '{}/branches/{}'.format(url, branch)
elif tag:
url = '{}/tags/{}'.format(url, tag)
elif branch:
url = '{}/{}'.format(url, branch or self.default_branch)
response = requests.request(
method='PROPFIND',
url=url,
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<propfind xmlns="DAV:">\n'
' <prop>\n'
' <resourcetype xmlns="DAV:"/>\n'
' <getcontentlength xmlns="DAV:"/>\n'
' <deadprop-count xmlns="http://subversion.tigris.org/xmlns/dav/"/>\n'
' <version-name xmlns="DAV:"/>\n'
' <creationdate xmlns="DAV:"/>\n'
' <creator-displayname xmlns="DAV:"/>\n'
' </prop>\n'
'</propfind>\n',
)
if response.status_code not in [200, 207]:
return {}
response = xmltodict.parse(response.text)
response = response.get('D:multistatus', response).get('D:response', [])
if not response:
return {}
response = response[0] if isinstance(response, list) else response
response = response['D:propstat'][0]['D:prop']
return {
'Last Changed Rev': response['lp1:version-name'],
'Last Changed Author': response.get('lp1:creator-displayname'),
'Last Changed Date': ' '.join(response['lp1:creationdate'].split('T')).split('.')[0],
'Revision': revision,
}
@property
def default_branch(self):
return 'trunk'
def list(self, category):
revision = self._latest()
if not revision:
return []
response = requests.request(
method='PROPFIND',
url='{}!svn/rvr/{}/{}'.format(self.url, revision, category),
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<?xml version="1.0" encoding="utf-8"?>\n'
'<propfind xmlns="DAV:">\n'
' <prop><resourcetype xmlns="DAV:"/></prop>\n'
'</propfind>\n',
)
if response.status_code not in [200, 207]:
return []
responses = xmltodict.parse(response.text)
responses = responses.get('D:multistatus', responses).get('D:response', [])
results = []
for response in responses:
candidate = response['D:href'].split('!svn/rvr/{}/{}/'.format(revision, category))[-1].rstrip('/')
if not candidate:
continue
results.append(candidate)
return results
@property
def branches(self):
return [self.default_branch] + self.list('branches')
@property
def tags(self):
return self.list('tags')
def _cache_lock(self):
return fasteners.InterProcessLock(os.path.join(os.path.dirname(self._cache_path), 'cache.lock'))
def _cache_revisions(self, branch=None):
branch = branch or self.default_branch
is_default_branch = branch == self.default_branch
if branch not in self._metadata_cache:
self._metadata_cache[branch] = [0] if is_default_branch else []
pos = len(self._metadata_cache[branch])
# If we aren't on the default branch, we will need the default branch to determine when
# our branch intersects with the default branch.
if not is_default_branch:
self._cache_revisions(branch=self.default_branch)
did_warn = False
count = 0
latest = self._latest()
with requests.request(
method='REPORT',
url='{}!svn/rvr/{}/{}'.format(
self.url,
latest,
branch if is_default_branch or '/' in branch else 'branches/{}'.format(branch),
), stream=True,
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<S:log-report xmlns:S="svn:">\n'
'<S:start-revision>{revision}</S:start-revision>\n'
'<S:end-revision>0</S:end-revision>\n'
'<S:path></S:path>\n'
'</S:log-report>\n'.format(revision=latest),
) as response:
if response.status_code != 200:
raise self.Exception("Failed to construct branch history for '{}'".format(branch))
default_count = 0
for line in response.iter_lines():
match = self.DATA_RE.match(line)
if not match or match.group('tag') != b'version-name':
continue
if not did_warn:
count += 1
if count > 1000:
self.log('Caching commit data for {}, this will take a few minutes...'.format(branch))
did_warn = True
revision = int(match.group('content'))
if pos > 0 and self._metadata_cache[branch][pos - 1] == revision:
break
if not is_default_branch:
if revision in self._metadata_cache[self.default_branch]:
# Only handle 2 sequential cross-branch commits
if default_count > 2:
break
default_count += 1
else:
default_count = 0
self._metadata_cache[branch].insert(pos, revision)
if default_count:
self._metadata_cache[branch] = self._metadata_cache[branch][default_count - 1:]
if self._metadata_cache[self.default_branch][0] == [0]:
self._metadata_cache['identifier'] = len(self._metadata_cache[branch])
try:
if not os.path.isdir(os.path.dirname(self._cache_path)):
os.makedirs(os.path.dirname(self._cache_path))
with self._cache_lock(), open(self._cache_path, 'w') as file:
json.dump(self._metadata_cache, file, indent=4)
except (IOError, OSError):
self.log("Failed to write SVN cache to '{}'".format(self._cache_path))
return self._metadata_cache[branch]
def _branch_for(self, revision):
response = requests.request(
method='REPORT',
url='{}!svn/rvr/{}'.format(self.url, revision),
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<S:log-report xmlns:S="svn:">\n'
'<S:start-revision>{revision}</S:start-revision>\n'
'<S:end-revision>{revision}</S:end-revision>\n'
'<S:limit>1</S:limit>\n'
'<S:discover-changed-paths/>\n'
'</S:log-report>\n'.format(revision=revision),
)
# If we didn't get a valid answer from the remote, but we found a matching candidate, we return that.
# This is a bit risky because there is a chance the branch we have cached is not the canonical branch
# for a revision, but this is pretty unlikely because it would require the n + 1 level branch to be cached
# but not the n level branch.
if response.status_code != 200:
raise self.Exception("Failed to retrieve branch for '{}'".format(revision))
partial = None
items = xmltodict.parse(response.text)['S:log-report']['S:log-item']
for group in (items.get('S:modified-path', []), items.get('S:added-path', []), items.get('S:deleted-path', [])):
for item in group if isinstance(group, list) else [group]:
if not partial:
partial = item['#text']
while not item['#text'].startswith(partial):
partial = partial[:-1]
candidate = partial.split('/')[2 if partial.startswith('/branches') else 1]
# Tags are a unique case for SVN, because they're treated as branches in native SVN
if candidate == 'tags':
return partial[1:].rstrip('/')
return candidate
def _commit_count(self, revision=None, branch=None):
branch = branch or self.default_branch
if revision:
if revision not in self._metadata_cache[branch]:
raise self.Exception("Failed to find '{}' on '{}'".format(revision, branch))
return bisect.bisect_left(self._metadata_cache[branch], int(revision))
if branch == self.default_branch:
return len(self._metadata_cache[branch])
return self._commit_count(revision=self._metadata_cache[branch][0], branch=self.default_branch)
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True):
if hash:
raise ValueError('SVN does not support Git hashes')
parsed_branch_point = None
if identifier is not None:
if revision:
raise ValueError('Cannot define both revision and identifier')
if tag:
raise ValueError('Cannot define both tag and identifier')
parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(identifier, do_assert=True)
if parsed_branch:
if branch and branch != parsed_branch:
raise ValueError(
"Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})".format(
branch, parsed_branch,
),
)
branch = parsed_branch
branch = branch or self.default_branch
if branch == self.default_branch and parsed_branch_point:
raise self.Exception('Cannot provide a branch point for a commit on the default branch')
if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
if branch != self.default_branch:
self._cache_revisions(branch=self.default_branch)
self._cache_revisions(branch=branch)
if identifier > len(self._metadata_cache.get(branch, [])):
raise self.Exception('Identifier {} cannot be found on the specified branch in the current checkout'.format(identifier))
if identifier <= 0:
if branch == self.default_branch:
raise self.Exception('Illegal negative identifier on the default branch')
identifier = self._commit_count(branch=branch) + identifier
if identifier < 0:
raise self.Exception('Identifier does not exist on the specified branch')
branch = self.default_branch
revision = self._metadata_cache[branch][identifier]
info = self.info(cached=True, branch=branch, revision=revision)
branch = self._branch_for(revision)
if not self._metadata_cache.get(branch, []) or identifier >= len(self._metadata_cache.get(branch, [])):
self._cache_revisions(branch=branch)
elif revision:
if branch:
raise ValueError('Cannot define both branch and revision')
if tag:
raise ValueError('Cannot define both tag and revision')
revision = Commit._parse_revision(revision, do_assert=True)
branch = self._branch_for(revision) or self.default_branch
info = self.info(cached=True, branch=branch, revision=revision)
else:
if branch and tag:
raise ValueError('Cannot define both branch and tag')
branch = None if tag else branch or self.default_branch
info = self.info(tag=tag) if tag else self.info(branch=branch)
if not info:
raise self.Exception("'{}' is not a recognized {}".format(
tag or branch,
'tag' if tag else 'branch',
))
revision = int(info['Last Changed Rev'])
if branch != self.default_branch:
branch = self._branch_for(revision)
date = datetime.strptime(info['Last Changed Date'], '%Y-%m-%d %H:%M:%S') if info.get('Last Changed Date') else None
if include_identifier and not identifier:
if branch != self.default_branch and revision > self._metadata_cache.get(self.default_branch, [0])[-1]:
self._cache_revisions(branch=self.default_branch)
if revision not in self._metadata_cache.get(branch, []):
self._cache_revisions(branch=branch)
identifier = self._commit_count(revision=revision, branch=branch)
branch_point = None if not include_identifier or branch == self.default_branch else self._commit_count(branch=branch)
if branch_point and parsed_branch_point and branch_point != parsed_branch_point:
raise ValueError("Provided 'branch_point' does not match branch point of specified branch")
response = requests.request(
method='REPORT',
url='{}!svn/rvr/{}'.format(self.url, revision),
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<S:log-report xmlns:S="svn:">\n'
'<S:start-revision>{revision}</S:start-revision>\n'
'<S:end-revision>{revision}</S:end-revision>\n'
'<S:limit>1</S:limit>\n'
'</S:log-report>\n'.format(revision=revision),
) if include_log else None
if response and response.status_code == 200:
response = xmltodict.parse(response.text)
response = response.get('S:log-report', {}).get('S:log-item')
name = response.get('D:creator-displayname')
message = response.get('D:comment', None)
else:
if include_log:
self.log('Failed to connect to remote, cannot compute commit message')
message = None
name = info.get('Last Changed Author')
author = self.contributors.create(name, name) if name and '@' in name else self.contributors.create(name)
return Commit(
repository_id=self.id,
revision=int(revision),
branch=branch,
identifier=identifier if include_identifier else None,
branch_point=branch_point,
timestamp=int(calendar.timegm(date.timetuple())) if date else None,
author=author,
message=message,
)
def _args_from_content(self, content, include_log=True):
xml = xmltodict.parse(content)
date = datetime.strptime(string_utils.decode(xml['S:log-item']['S:date']).split('.')[0], '%Y-%m-%dT%H:%M:%S')
name = string_utils.decode(xml['S:log-item']['D:creator-displayname'])
return dict(
revision=int(xml['S:log-item']['D:version-name']),
author=self.contributors.create(name, name) if name and '@' in name else self.contributors.create(name),
timestamp=int(calendar.timegm(date.timetuple())),
message=string_utils.decode(xml['S:log-item']['D:comment']) if include_log else None,
)
def commits(self, begin=None, end=None, include_log=True, include_identifier=True):
begin, end = self._commit_range(begin=begin, end=end, include_identifier=include_identifier)
previous = end
content = b''
with requests.request(
method='REPORT',
url='{}!svn/rvr/{}/{}'.format(
self.url,
end.revision,
end.branch if end.branch == self.default_branch or '/' in end.branch else 'branches/{}'.format(end.branch),
), stream=True,
headers={
'Content-Type': 'text/xml',
'Accept-Encoding': 'gzip',
'DEPTH': '1',
}, data='<S:log-report xmlns:S="svn:">\n'
'<S:start-revision>{end}</S:start-revision>\n'
'<S:end-revision>{begin}</S:end-revision>\n'
'<S:path></S:path>\n'
'</S:log-report>\n'.format(end=end.revision, begin=begin.revision),
) as response:
if response.status_code != 200:
raise self.Exception("Failed to construct branch history for '{}'".format(branch))
for line in response.iter_lines():
if line == b'<S:log-item>':
content = line + b'\n'
else:
content += line + b'\n'
if line != b'</S:log-item>':
continue
args = self._args_from_content(content, include_log=include_log)
branch_point = previous.branch_point if include_identifier else None
identifier = previous.identifier if include_identifier else None
if args['revision'] != previous.revision:
identifier -= 1
if not identifier:
identifier = branch_point
branch_point = None
previous = Commit(
repository_id=self.id,
branch=end.branch if branch_point else self.default_branch,
identifier=identifier,
branch_point=branch_point,
**args
)
yield previous
content = b''