blob: 816fb1144e478a49583f270de0670f015306be80 [file] [log] [blame]
#!/usr/bin/env python
# Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
import logging
import re
from collections import deque
from webkitpy.common.host import Host
from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup as Parser
_log = logging.getLogger(__name__)
class TestParser(object):
def __init__(self, options, filename, host=Host(), source_root_directory=None):
self.options = options
self.filename = filename
self.host = host
self.filesystem = self.host.filesystem
self.source_root_directory = source_root_directory
self.test_doc = None
self.ref_doc = None
self.load_file(filename)
def load_file(self, filename, is_ref=False):
if self.filesystem.isfile(filename):
try:
doc = Parser(self.filesystem.read_binary_file(filename))
except:
# FIXME: Figure out what to do if we can't parse the file.
_log.error("Failed to parse %s", filename)
doc = None
else:
if self.filesystem.isdir(filename):
# FIXME: Figure out what is triggering this and what to do about it.
_log.error("Trying to load %s, which is a directory", filename)
doc = None
if is_ref:
self.ref_doc = doc
else:
self.test_doc = doc
def analyze_test(self, test_contents=None, ref_contents=None):
""" Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """
test_info = None
if test_contents is None and self.test_doc is None:
return test_info
if test_contents is not None:
self.test_doc = Parser(test_contents)
if ref_contents is not None:
self.ref_doc = Parser(ref_contents)
matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
# Manual tests may also have properties that make them look like non-manual reference or JS
# tests, so exclude them first.
if self.is_wpt_manualtest() and not self.is_reference_filename():
test_info = {'test': self.filename, 'manualtest': True}
elif matches:
if len(matches) > 1:
# FIXME: Is this actually true? We should fix this.
_log.warning('Multiple references are not supported. Importing the first ref defined in %s',
self.filesystem.basename(self.filename))
try:
href_match_file = matches[0]['href'].strip()
if href_match_file.startswith('/'):
ref_file = self.filesystem.join(self.source_root_directory, href_match_file.lstrip('/'))
else:
ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), href_match_file)
reference_type = matches[0]['rel'][0] if isinstance(matches[0]['rel'], list) else matches[0]['rel']
except KeyError as e:
# FIXME: Figure out what to do w/ invalid test files.
_log.error('%s has a reference link but is missing the "href"', self.filesystem)
return None
if (ref_file == self.filename):
return {'referencefile': self.filename}
if self.ref_doc is None:
self.load_file(ref_file, True)
test_info = {'test': self.filename, 'reference': ref_file, 'type': reference_type}
# If the ref file does not live in the same directory as the test file, check it for support files
test_info['reference_support_info'] = {}
if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
reference_support_files = self.support_files(self.ref_doc)
if len(reference_support_files) > 0:
reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}
elif self.is_jstest():
test_info = {'test': self.filename, 'jstest': True}
elif self.is_reference_filename():
test_info = {'referencefile': self.filename}
elif self.options['all'] is True:
test_info = {'test': self.filename}
if test_info and self.is_slow_test():
test_info['slow'] = True
if test_info:
test_info['fuzzy'] = self.fuzzy_metadata()
return test_info
def reference_links_of_type(self, reftest_type):
return self.test_doc.findAll(rel=reftest_type)
def is_jstest(self):
"""Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))
def is_wpt_manualtest(self):
"""Returns whether the test is a manual test according WPT rules."""
# General rule for manual test i.e. file ends with -manual.htm path
# See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test
if self.filename.find('-manual.') != -1:
return True
# Rule specific to CSS WG manual tests i.e. rely on <meta name="flags">
# See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags
# For further details and discussions, see the following links:
# https://github.com/web-platform-tests/wpt/issues/5381
# https://github.com/web-platform-tests/wpt/issues/5293
for match in self.test_doc.findAll(name='meta', attrs={'name': 'flags', 'content': True}):
css_flags = set(match['content'].split())
if bool(css_flags & {"animated", "font", "history", "interact", "paged", "speech", "userstyle"}):
return True
return False
def is_reference_filename(self):
# From tools/manifest/sourcefile.py in WPT repository
# https://github.com/web-platform-tests/wpt/blob/22f29564bb82b407aeaf6507c8efffdbd51b9974/tools/manifest/sourcefile.py#L405
reference_file_re = re.compile(r'(^|[\-_])(not)?ref[0-9]*([\-_]|$)')
return "/reference/" in self.filename or bool(reference_file_re.search(self.filename))
def is_slow_test(self):
return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')])
def has_fuzzy_metadata(self):
return any([match['name'] == 'fuzzy' for match in self.test_doc.findAll('meta')])
def fuzzy_metadata(self):
fuzzy_nodes = self.test_doc.findAll('meta', attrs={"name": "fuzzy"})
if not fuzzy_nodes:
return None
args = [u"maxDifference", u"totalPixels"]
result = {}
# Taken from wpt/tools/manifest/sourcefile.py, and copied to avoid having webkitpy depend on wpt.
for node in fuzzy_nodes:
content = node['content']
key = None
# from parse_ref_keyed_meta; splits out the optional reference prefix.
parts = content.rsplit(u":", 1)
if len(parts) == 1:
fuzzy_data = parts[0]
else:
ref_file = parts[0]
key = ref_file
fuzzy_data = parts[1]
ranges = fuzzy_data.split(u";")
if len(ranges) != 2:
raise ValueError("Malformed fuzzy value %s" % value)
arg_values = {} # type: Dict[Text, List[int]]
positional_args = deque() # type: Deque[List[int]]
for range_str_value in ranges: # type: Text
name = None # type: Optional[Text]
if u"=" in range_str_value:
name, range_str_value = [part.strip() for part in range_str_value.split(u"=", 1)]
if name not in args:
raise ValueError("%s is not a valid fuzzy property" % name)
if arg_values.get(name):
raise ValueError("Got multiple values for argument %s" % name)
if u"-" in range_str_value:
range_min, range_max = range_str_value.split(u"-")
else:
range_min = range_str_value
range_max = range_str_value
try:
range_value = [int(x.strip()) for x in (range_min, range_max)]
except ValueError:
raise ValueError("Fuzzy value %s must be a range of integers" % range_str_value)
if name is None:
positional_args.append(range_value)
else:
arg_values[name] = range_value
result[key] = []
for arg_name in args:
if arg_values.get(arg_name):
arg_value = arg_values.pop(arg_name)
else:
arg_value = positional_args.popleft()
result[key].append(arg_value)
assert len(arg_values) == 0 and len(positional_args) == 0
return result
def support_files(self, doc):
""" Searches the file for all paths specified in url()'s, href or src attributes."""
support_files = []
if doc is None:
return support_files
elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
elements_with_href_attributes = doc.findAll(href=re.compile('.*'))
url_pattern = re.compile(r'url\(.*\)')
urls = []
for url in doc.findAll(text=url_pattern):
for url in re.findall(url_pattern, url):
url = re.sub('url\\([\'\"]?', '', url)
url = re.sub('[\'\"]?\\)', '', url)
urls.append(url)
src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]
paths = src_paths + href_paths + urls
for path in paths:
uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
if not uri_scheme_pattern.match(path):
support_files.append(path)
return support_files