Tools/Scripts/webkitpy/w3c/test_parser.py - WebKit - Git at Google

 #!/usr/bin/env python

 # Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #
 # 1. Redistributions of source code must retain the above
 #    copyright notice, this list of conditions and the following
 #    disclaimer.
 # 2. Redistributions in binary form must reproduce the above
 #    copyright notice, this list of conditions and the following
 #    disclaimer in the documentation and/or other materials
 #    provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 # OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.

 import logging
 import re

 from collections import deque

 from webkitpy.common.host import Host
 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup as Parser


 _log = logging.getLogger(__name__)


 class TestParser(object):

     def __init__(self, options, filename, host=Host(), source_root_directory=None):
         self.options = options
         self.filename = filename
         self.host = host
         self.filesystem = self.host.filesystem
         self.source_root_directory = source_root_directory

         self.test_doc = None
         self.ref_doc = None
         self.load_file(filename)

     def load_file(self, filename, is_ref=False):
         if self.filesystem.isfile(filename):
             try:
                 doc = Parser(self.filesystem.read_binary_file(filename))
             except:
                 # FIXME: Figure out what to do if we can't parse the file.
                 _log.error("Failed to parse %s", filename)
                 doc = None
         else:
             if self.filesystem.isdir(filename):
                 # FIXME: Figure out what is triggering this and what to do about it.
                 _log.error("Trying to load %s, which is a directory", filename)
             doc = None

         if is_ref:
             self.ref_doc = doc
         else:
             self.test_doc = doc

     def analyze_test(self, test_contents=None, ref_contents=None):
         """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

         test_info = None

         if test_contents is None and self.test_doc is None:
             return test_info
         if test_contents is not None:
             self.test_doc = Parser(test_contents)
         if ref_contents is not None:
             self.ref_doc = Parser(ref_contents)

         matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')

         # Manual tests may also have properties that make them look like non-manual reference or JS
         # tests, so exclude them first.
         if self.is_wpt_manualtest() and not self.is_reference_filename():
             test_info = {'test': self.filename, 'manualtest': True}
         elif matches:
             if len(matches) > 1:
                 # FIXME: Is this actually true? We should fix this.
                 _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                              self.filesystem.basename(self.filename))

             try:
                 href_match_file = matches[0]['href'].strip()
                 if href_match_file.startswith('/'):
                     ref_file = self.filesystem.join(self.source_root_directory, href_match_file.lstrip('/'))
                 else:
                     ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), href_match_file)

                 reference_type = matches[0]['rel'][0] if isinstance(matches[0]['rel'], list) else matches[0]['rel']
             except KeyError as e:
                 # FIXME: Figure out what to do w/ invalid test files.
                 _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                 return None

             if (ref_file == self.filename):
                 return {'referencefile': self.filename}

             if self.ref_doc is None:
                 self.load_file(ref_file, True)

             test_info = {'test': self.filename, 'reference': ref_file, 'type': reference_type}

             # If the ref file does not live in the same directory as the test file, check it for support files
             test_info['reference_support_info'] = {}
             if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                 reference_support_files = self.support_files(self.ref_doc)
                 if len(reference_support_files) > 0:
                     reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                     test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

         elif self.is_jstest():
             test_info = {'test': self.filename, 'jstest': True}
         elif self.is_reference_filename():
             test_info = {'referencefile': self.filename}
         elif self.options['all'] is True:
             test_info = {'test': self.filename}

         if test_info and self.is_slow_test():
             test_info['slow'] = True

         if test_info:
             test_info['fuzzy'] = self.fuzzy_metadata()

         return test_info

     def reference_links_of_type(self, reftest_type):
         return self.test_doc.findAll(rel=reftest_type)

     def is_jstest(self):
         """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
         return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

     def is_wpt_manualtest(self):
         """Returns whether the test is a manual test according WPT rules."""
         # General rule for manual test i.e. file ends with -manual.htm path
         # See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test
         if self.filename.find('-manual.') != -1:
             return True

         # Rule specific to CSS WG manual tests i.e. rely on <meta name="flags">
         # See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags
         # For further details and discussions, see the following links:
         # https://github.com/web-platform-tests/wpt/issues/5381
         # https://github.com/web-platform-tests/wpt/issues/5293
         for match in self.test_doc.findAll(name='meta', attrs={'name': 'flags', 'content': True}):
             css_flags = set(match['content'].split())
             if bool(css_flags & {"animated", "font", "history", "interact", "paged", "speech", "userstyle"}):
                 return True

         return False

     def is_reference_filename(self):
         # From tools/manifest/sourcefile.py in WPT repository
         # https://github.com/web-platform-tests/wpt/blob/22f29564bb82b407aeaf6507c8efffdbd51b9974/tools/manifest/sourcefile.py#L405
         reference_file_re = re.compile(r'(^|[\-_])(not)?ref[0-9]*([\-_]|$)')
         return "/reference/" in self.filename or bool(reference_file_re.search(self.filename))

     def is_slow_test(self):
         return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')])

     def has_fuzzy_metadata(self):
         return any([match['name'] == 'fuzzy' for match in self.test_doc.findAll('meta')])

     def fuzzy_metadata(self):
         fuzzy_nodes = self.test_doc.findAll('meta', attrs={"name": "fuzzy"})
         if not fuzzy_nodes:
             return None

         args = [u"maxDifference", u"totalPixels"]
         result = {}

         # Taken from wpt/tools/manifest/sourcefile.py, and copied to avoid having webkitpy depend on wpt.
         for node in fuzzy_nodes:
             content = node['content']
             key = None
             # from parse_ref_keyed_meta; splits out the optional reference prefix.
             parts = content.rsplit(u":", 1)
             if len(parts) == 1:
                 fuzzy_data = parts[0]
             else:
                 ref_file = parts[0]
                 key = ref_file
                 fuzzy_data = parts[1]

             ranges = fuzzy_data.split(u";")
             if len(ranges) != 2:
                 raise ValueError("Malformed fuzzy value %s" % value)

             arg_values = {}  # type: Dict[Text, List[int]]
             positional_args = deque()  # type: Deque[List[int]]

             for range_str_value in ranges:  # type: Text
                 name = None  # type: Optional[Text]
                 if u"=" in range_str_value:
                     name, range_str_value = [part.strip() for part in range_str_value.split(u"=", 1)]
                     if name not in args:
                         raise ValueError("%s is not a valid fuzzy property" % name)
                     if arg_values.get(name):
                         raise ValueError("Got multiple values for argument %s" % name)

                 if u"-" in range_str_value:
                     range_min, range_max = range_str_value.split(u"-")
                 else:
                     range_min = range_str_value
                     range_max = range_str_value
                 try:
                     range_value = [int(x.strip()) for x in (range_min, range_max)]
                 except ValueError:
                     raise ValueError("Fuzzy value %s must be a range of integers" % range_str_value)

                 if name is None:
                     positional_args.append(range_value)
                 else:
                     arg_values[name] = range_value

             result[key] = []
             for arg_name in args:
                 if arg_values.get(arg_name):
                     arg_value = arg_values.pop(arg_name)
                 else:
                     arg_value = positional_args.popleft()
                 result[key].append(arg_value)
             assert len(arg_values) == 0 and len(positional_args) == 0

         return result

     def support_files(self, doc):
         """ Searches the file for all paths specified in url()'s, href or src attributes."""
         support_files = []

         if doc is None:
             return support_files

         elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
         elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

         url_pattern = re.compile(r'url\(.*\)')
         urls = []
         for url in doc.findAll(text=url_pattern):
             for url in re.findall(url_pattern, url):
                 url = re.sub('url\\([\'\"]?', '', url)
                 url = re.sub('[\'\"]?\\)', '', url)
                 urls.append(url)

         src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
         href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

         paths = src_paths + href_paths + urls
         for path in paths:
             uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
             if not uri_scheme_pattern.match(path):
                 support_files.append(path)

         return support_files
	#!/usr/bin/env python

	# Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions
	# are met:
	#
	# 1. Redistributions of source code must retain the above
	# copyright notice, this list of conditions and the following
	# disclaimer.
	# 2. Redistributions in binary form must reproduce the above
	# copyright notice, this list of conditions and the following
	# disclaimer in the documentation and/or other materials
	# provided with the distribution.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
	# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
	# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
	# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
	# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	# SUCH DAMAGE.

	import logging
	import re

	from collections import deque

	from webkitpy.common.host import Host
	from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup as Parser


	_log = logging.getLogger(__name__)


	class TestParser(object):

	def __init__(self, options, filename, host=Host(), source_root_directory=None):
	self.options = options
	self.filename = filename
	self.host = host
	self.filesystem = self.host.filesystem
	self.source_root_directory = source_root_directory

	self.test_doc = None
	self.ref_doc = None
	self.load_file(filename)

	def load_file(self, filename, is_ref=False):
	if self.filesystem.isfile(filename):
	try:
	doc = Parser(self.filesystem.read_binary_file(filename))
	except:
	# FIXME: Figure out what to do if we can't parse the file.
	_log.error("Failed to parse %s", filename)
	doc = None
	else:
	if self.filesystem.isdir(filename):
	# FIXME: Figure out what is triggering this and what to do about it.
	_log.error("Trying to load %s, which is a directory", filename)
	doc = None

	if is_ref:
	self.ref_doc = doc
	else:
	self.test_doc = doc

	def analyze_test(self, test_contents=None, ref_contents=None):
	""" Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

	test_info = None

	if test_contents is None and self.test_doc is None:
	return test_info
	if test_contents is not None:
	self.test_doc = Parser(test_contents)
	if ref_contents is not None:
	self.ref_doc = Parser(ref_contents)

	matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')

	# Manual tests may also have properties that make them look like non-manual reference or JS
	# tests, so exclude them first.
	if self.is_wpt_manualtest() and not self.is_reference_filename():
	test_info = {'test': self.filename, 'manualtest': True}
	elif matches:
	if len(matches) > 1:
	# FIXME: Is this actually true? We should fix this.
	_log.warning('Multiple references are not supported. Importing the first ref defined in %s',
	self.filesystem.basename(self.filename))

	try:
	href_match_file = matches[0]['href'].strip()
	if href_match_file.startswith('/'):
	ref_file = self.filesystem.join(self.source_root_directory, href_match_file.lstrip('/'))
	else:
	ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), href_match_file)

	reference_type = matches[0]['rel'][0] if isinstance(matches[0]['rel'], list) else matches[0]['rel']
	except KeyError as e:
	# FIXME: Figure out what to do w/ invalid test files.
	_log.error('%s has a reference link but is missing the "href"', self.filesystem)
	return None

	if (ref_file == self.filename):
	return {'referencefile': self.filename}

	if self.ref_doc is None:
	self.load_file(ref_file, True)

	test_info = {'test': self.filename, 'reference': ref_file, 'type': reference_type}

	# If the ref file does not live in the same directory as the test file, check it for support files
	test_info['reference_support_info'] = {}
	if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
	reference_support_files = self.support_files(self.ref_doc)
	if len(reference_support_files) > 0:
	reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
	test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

	elif self.is_jstest():
	test_info = {'test': self.filename, 'jstest': True}
	elif self.is_reference_filename():
	test_info = {'referencefile': self.filename}
	elif self.options['all'] is True:
	test_info = {'test': self.filename}

	if test_info and self.is_slow_test():
	test_info['slow'] = True

	if test_info:
	test_info['fuzzy'] = self.fuzzy_metadata()

	return test_info

	def reference_links_of_type(self, reftest_type):
	return self.test_doc.findAll(rel=reftest_type)

	def is_jstest(self):
	"""Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
	return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

	def is_wpt_manualtest(self):
	"""Returns whether the test is a manual test according WPT rules."""
	# General rule for manual test i.e. file ends with -manual.htm path
	# See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test
	if self.filename.find('-manual.') != -1:
	return True

	# Rule specific to CSS WG manual tests i.e. rely on <meta name="flags">
	# See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags
	# For further details and discussions, see the following links:
	# https://github.com/web-platform-tests/wpt/issues/5381
	# https://github.com/web-platform-tests/wpt/issues/5293
	for match in self.test_doc.findAll(name='meta', attrs={'name': 'flags', 'content': True}):
	css_flags = set(match['content'].split())
	if bool(css_flags & {"animated", "font", "history", "interact", "paged", "speech", "userstyle"}):
	return True

	return False

	def is_reference_filename(self):
	# From tools/manifest/sourcefile.py in WPT repository
	# https://github.com/web-platform-tests/wpt/blob/22f29564bb82b407aeaf6507c8efffdbd51b9974/tools/manifest/sourcefile.py#L405
	reference_file_re = re.compile(r'(^\|[\-_])(not)?ref[0-9]*([\-_]\|$)')
	return "/reference/" in self.filename or bool(reference_file_re.search(self.filename))

	def is_slow_test(self):
	return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')])

	def has_fuzzy_metadata(self):
	return any([match['name'] == 'fuzzy' for match in self.test_doc.findAll('meta')])

	def fuzzy_metadata(self):
	fuzzy_nodes = self.test_doc.findAll('meta', attrs={"name": "fuzzy"})
	if not fuzzy_nodes:
	return None

	args = [u"maxDifference", u"totalPixels"]
	result = {}

	# Taken from wpt/tools/manifest/sourcefile.py, and copied to avoid having webkitpy depend on wpt.
	for node in fuzzy_nodes:
	content = node['content']
	key = None
	# from parse_ref_keyed_meta; splits out the optional reference prefix.
	parts = content.rsplit(u":", 1)
	if len(parts) == 1:
	fuzzy_data = parts[0]
	else:
	ref_file = parts[0]
	key = ref_file
	fuzzy_data = parts[1]

	ranges = fuzzy_data.split(u";")
	if len(ranges) != 2:
	raise ValueError("Malformed fuzzy value %s" % value)

	arg_values = {} # type: Dict[Text, List[int]]
	positional_args = deque() # type: Deque[List[int]]

	for range_str_value in ranges: # type: Text
	name = None # type: Optional[Text]
	if u"=" in range_str_value:
	name, range_str_value = [part.strip() for part in range_str_value.split(u"=", 1)]
	if name not in args:
	raise ValueError("%s is not a valid fuzzy property" % name)
	if arg_values.get(name):
	raise ValueError("Got multiple values for argument %s" % name)

	if u"-" in range_str_value:
	range_min, range_max = range_str_value.split(u"-")
	else:
	range_min = range_str_value
	range_max = range_str_value
	try:
	range_value = [int(x.strip()) for x in (range_min, range_max)]
	except ValueError:
	raise ValueError("Fuzzy value %s must be a range of integers" % range_str_value)

	if name is None:
	positional_args.append(range_value)
	else:
	arg_values[name] = range_value

	result[key] = []
	for arg_name in args:
	if arg_values.get(arg_name):
	arg_value = arg_values.pop(arg_name)
	else:
	arg_value = positional_args.popleft()
	result[key].append(arg_value)
	assert len(arg_values) == 0 and len(positional_args) == 0

	return result

	def support_files(self, doc):
	""" Searches the file for all paths specified in url()'s, href or src attributes."""
	support_files = []

	if doc is None:
	return support_files

	elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
	elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

	url_pattern = re.compile(r'url\(.*\)')
	urls = []
	for url in doc.findAll(text=url_pattern):
	for url in re.findall(url_pattern, url):
	url = re.sub('url\\([\'\"]?', '', url)
	url = re.sub('[\'\"]?\\)', '', url)
	urls.append(url)

	src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
	href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

	paths = src_paths + href_paths + urls
	for path in paths:
	uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
	if not uri_scheme_pattern.match(path):
	support_files.append(path)

	return support_files