blob: 5769dce87d9a9d7c56ed220cb16ac9deb61881ea [file] [log] [blame]
#!/usr/bin/env ruby
# Copyright (C) 2015 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
require "pathname"
$varIndex = 0
def newVar
$varIndex += 1
"_tmp#{$varIndex}"
end
class Production
attr_reader :origin, :name, :varName, :anonymous, :opcode, :children
def initialize(origin, name, opcode, children)
@origin = origin
@name = name
@anonymous = (name == nil or name =~ /\A\$([0-9]+)\Z/)
@varName = @anonymous ? newVar : name =~ /\A\$/ ? $' : name
@opcode = opcode
@children = children ? children : []
end
def hasOpcode
opcode != nil
end
def eachVariable(&proc)
children.each_with_index {
| child, index |
yield varName,
child.name,
child.varName,
child.anonymous ? (child.opcode ? :anonInternal : :anonOperand) : (child.opcode ? :internal : :operand),
index
child.eachVariable(&proc)
}
end
end
class Matcher
attr_reader :name, :productions
def initialize(name, productions)
@name = name
@productions = productions
end
end
class Origin
attr_reader :fileName, :lineNumber
def initialize(fileName, lineNumber)
@fileName = fileName
@lineNumber = lineNumber
end
def to_s
"#{fileName}:#{lineNumber}"
end
end
class Token
attr_reader :origin, :string
def initialize(origin, string)
@origin = origin
@string = string
end
def ==(other)
if other.is_a? Token
@string == other.string
else
@string == other
end
end
def =~(other)
@string =~ other
end
def to_s
"#{@string.inspect} at #{origin}"
end
def parseError(*comment)
if comment.empty?
raise "Parse error: #{to_s}"
else
raise "Parse error: #{to_s}: #{comment[0]}"
end
end
end
def lex(str, fileName)
fileName = Pathname.new(fileName)
result = []
lineNumber = 1
while not str.empty?
case str
when /\A\#([^\n]*)/
# comment, ignore
when /\A\n/
# newline, ignore
lineNumber += 1
when /\A[a-zA-Z0-9$]([a-zA-Z0-9_]*)/
result << Token.new(Origin.new(fileName, lineNumber), $&)
when /\A([ \t\r]+)/
# whitespace, ignore
when /\A[=(),]/
result << Token.new(Origin.new(fileName, lineNumber), $&)
else
raise "Lexer error at #{Origin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}"
end
str = $~.post_match
end
result
end
def isKeyword(token)
# We can extend this as we add more keywords. Like, we might want "include" eventually.
token =~ /\A((matcher)|(include))\Z/
end
def isIdentifier(token)
token =~ /\A[a-zA-Z0-9$]([a-zA-Z0-9_]*)\Z/ and not isKeyword(token)
end
class Parser
def initialize(data, fileName)
@tokens = lex(data, fileName)
@idx = 0
end
def token
@tokens[@idx]
end
def advance
@idx += 1
end
def parseError(*comment)
if token
token.parseError(*comment)
else
if comment.empty?
raise "Parse error at end of file"
else
raise "Parse error at end of file: #{comment[0]}"
end
end
end
def consume(string)
parseError("Expected #{string}") unless token == string
advance
end
def consumeIdentifier
result = token.string
parseError("Expected identifier") unless isIdentifier(result)
advance
result
end
def addUnique(name, productions, production)
if name == production.varName
parseError("Child production has same name as parent production")
end
if productions.detect { | entry | entry.name == production.name }
parseError("Duplicate production")
end
productions << production
end
def parse
consume("matcher")
matcherName = consumeIdentifier
productions = []
loop {
break if @idx >= @tokens.length
# We might want to support "include". If we did that, it would go here.
production = parseProduction
if productions.detect { | entry | entry.name == production.name }
parseError("Duplicate production")
end
productions << production
}
Matcher.new(matcherName, productions)
end
def parseProduction
origin = token.origin
name = consumeIdentifier
if token == "="
consume("=")
opcode = consumeIdentifier
consume("(")
elsif token == "("
consume("(")
opcode = name
name = nil
else
return Production.new(origin, name, nil, nil)
end
productions = []
loop {
break if token == ")"
addUnique(name, productions, parseProduction)
break if token == ")"
consume(",")
}
# Advance after the ")"
advance
Production.new(origin, name, opcode, productions)
end
end
fileName = ARGV[0]
parser = Parser.new(IO::read(fileName), fileName)
matcher = parser.parse
class SubMatch
attr_reader :indexMap, :productions
def initialize(indexList, productions)
@indexMap = []
@productions = []
indexList.each {
| index |
@indexMap << index
@productions << productions[index]
}
end
def mapIndexList(indexList)
indexList.map {
| index |
@indexMap[index]
}
end
end
$stderr.puts "Generating code for #{fileName}."
File.open("B3" + matcher.name + ".h", "w") {
| outp |
outp.puts "// Generated by generate_pattern_matcher.rb from #{fileName} -- do not edit!"
outp.puts "#ifndef B3#{matcher.name}_h"
outp.puts "#define B3#{matcher.name}_h"
outp.puts "#include \"B3Value.h\""
outp.puts "namespace JSC { namespace B3 {"
outp.puts "template<typename Adaptor>"
outp.puts "bool run#{matcher.name}(Value* _value, Adaptor& adaptor)"
outp.puts "{"
# Note that this does not emit properly indented code, because it's a recursive emitter. If you
# want to see the code nicely formatted, open it in a good text editor and ask it to format it
# for you.
# In odd situations, we may not use the input value. Tell the compiler to chill out about it.
outp.puts "UNUSED_PARAM(_value);"
# This just protects the caller from having to worry about productions having different lengths.
def matchLength(outp, valueName, productions)
indexLists = []
productions.each_with_index {
| production, index |
if indexLists[-1] and productions[indexLists[-1][0]].children.length == production.children.length
indexLists[-1] << index
else
indexLists << [index]
end
}
indexLists.each {
| indexList |
length = productions[indexList[0]].children.length
if length > 0
outp.puts "if (#{valueName}->children().size() >= #{length}) {"
yield indexList
outp.puts "}"
else
yield indexList
end
}
end
# This efficiently selects from the given set of productions. It assumes that productions
# are not duplicated. It yields the index of the found production, and the coroutine is
# responsible for emitting specific code for that production. The coroutine is given lists of
# indices of possibly-matching productions.
def matchProductions(outp, valueName, productions)
# First, split the productions list into runs of productions with an opcode and productions
# without one.
indexLists = []
productions.each_with_index {
| production, index |
if indexLists[-1] and productions[indexLists[-1][0]].hasOpcode == production.hasOpcode
indexLists[-1] << index
else
indexLists << [index]
end
}
# Now, emit pattern matching code. We do it differently for lists with an opcode than for
# lists without one.
indexLists.each {
| indexList |
if productions[indexList[0]].hasOpcode
# Now, we split this index list into groups for each opcode.
groups = {}
indexList.each {
| index |
production = productions[index]
if groups[production.opcode]
groups[production.opcode] << index
else
groups[production.opcode] = [index]
end
}
# Emit a switch statement.
outp.puts "switch (#{valueName}->opcode()) {"
groups.each_pair {
| opcode, subIndexList |
outp.puts "case #{opcode}:"
subMatch = SubMatch.new(subIndexList, productions)
matchLength(outp, valueName, subMatch.productions) {
| indexList |
yield subMatch.mapIndexList(indexList)
}
outp.puts "break;"
}
outp.puts "default:"
outp.puts "break;"
outp.puts "}"
else
subMatch = SubMatch.new(indexList, productions)
matchLength(outp, valueName, subMatch.productions) {
| indexList |
yield subMatch.mapIndexList(indexList)
}
end
}
end
# Takes productions that have the same opcode and the same length and selects from them based on
# the productions at the given column.
def matchColumn(outp, valueName, productions, columnIndex)
if columnIndex >= productions[0].children.length
yield (0...(productions.size)).to_a
return
end
subValue = newVar
outp.puts "{"
outp.puts "Value* #{subValue} = #{valueName}->child(#{columnIndex});"
# We may not use this value. Tell the compiler to chill out about it.
outp.puts "UNUSED_PARAM(#{subValue});"
subProductions = productions.map {
| production |
production.children[columnIndex]
}
matchRecursively(outp, subValue, subProductions) {
| indexList |
subMatch = SubMatch.new(indexList, productions)
matchColumn(outp, valueName, subMatch.productions, columnIndex + 1) {
| indexList |
yield subMatch.mapIndexList(indexList)
}
}
outp.puts "}"
end
def matchRecursively(outp, valueName, productions)
matchProductions(outp, valueName, productions) {
| indexList |
# We are guaranteed that this index list contains productions with the same opcode and
# the same length.
subMatch = SubMatch.new(indexList, productions)
matchColumn(outp, valueName, subMatch.productions, 0) {
| indexList |
yield subMatch.mapIndexList(indexList)
}
}
end
matchRecursively(outp, "_value", matcher.productions) {
| indexList |
indexList.each {
| index |
production = matcher.productions[index]
outp.puts "{"
outp.puts "Value* #{production.varName} = _value;"
outp.puts "UNUSED_PARAM(#{production.varName});"
internalArguments = []
operandArguments = []
tryArguments = []
numScopes = 0
seen = {}
# FIXME: We want to be able to combine pattern matchers, like have the pattern match rule
# for Branch in the lowering patcher delegate to a matcher that can deduce the kind of
# comparison that we're doing. We could then reuse that latter matcher for Check and
# unfused compare. The key to making such a delegation work is to have the inner matcher
# (the comparison matcher in this case) keep cascading if it encounters a rule that
# produces something that the outer matcher cannot handle. It's not obvious that the
# comparison matcher would need this, but the address matcher probably will. For example,
# we may have a StoreAddLoad rule that delegates to the address matcher, and the address
# matcher may produce an address that the lowering matcher cannot use because while the
# Add form does accept addresses it may not accept the particular address that the
# address matcher produced. In that case, instead of giving up on the StoreAddLoad rule,
# we want the address matcher to just try a different address. The comparison matcher
# might want this just for better controlling when to pin variables. Currently, if it
# constructed some code, it would also pin the variables that it used. If the lowering
# matcher then decided to reject the code created by the comparison matcher, it would
# have a hard time "unpinning" those variables. But if we had some kind of delegation, we
# could have the pinning of the comparison matcher happen only if the lowering matcher
# accepted.
#
# This could all be accomplished by having tryBlah() return something, and have the
# matcher also take a functor argument that accepts what tryBlah() returns. So instead of
#
# if (tryBlah()) ok;
#
# We would have:
#
# if (functor(tryBlah()) ok;
#
# When lowering decided to delegate to the address or comparison matcher, it could supply
# a functor that decides whether the thing that the sub-matcher selected is indeed
# useful.
#
# In the near term, we probably won't need this. But we will definitely need it as we
# expand to efficiently support more platforms. On X86_64, any branching instruction is
# usable in any context, and any address should be usable in any context (and the only
# cases where it wouldn't be is if we have holes in the MacroAssembler).
#
# https://bugs.webkit.org/show_bug.cgi?id=150559
production.eachVariable {
| parentVarName, childName, childVarName, kind, index |
loadExpr = "#{parentVarName}->child(#{index})"
if seen[childVarName]
tmp = newVar
outp.puts "Value* #{tmp} = #{loadExpr};"
outp.puts "if (#{tmp} == #{childVarName}) {"
numScopes += 1
else
seen[childVarName] = true
outp.puts "Value* #{childVarName} = #{loadExpr};"
# FIXME: Consider getting rid of the $ prefix.
# https://bugs.webkit.org/show_bug.cgi?id=150527
if childName =~ /\A\$/
if childName =~ /\A\$([0-9]+)\Z/
outp.puts "if (#{childVarName}->isInt(#{$1})) {"
else
outp.puts "if (#{childVarName}->hasInt()) {"
end
numScopes += 1
end
internalArguments << childVarName if kind == :internal or kind == :anonInternal
operandArguments << childVarName if kind == :operand or kind == :anonOperand
tryArguments << childVarName if kind == :internal or kind == :operand
end
}
outp.puts "if (adaptor.acceptRoot(_value)"
unless internalArguments.empty?
outp.puts("&& adaptor.acceptInternals(" + internalArguments.join(", ") + ")")
end
unless operandArguments.empty?
outp.puts("&& adaptor.acceptOperands(" + operandArguments.join(", ") + ")")
end
outp.puts("&& adaptor.try#{production.name}(" + tryArguments.join(", ") + ")) {")
outp.puts "adaptor.acceptRootLate(_value);"
unless internalArguments.empty?
outp.puts "adaptor.acceptInternalsLate(" + internalArguments.join(", ") + ");"
end
unless operandArguments.empty?
outp.puts "adaptor.acceptOperandsLate(" + operandArguments.join(", ") + ");"
end
outp.puts "return true;"
outp.puts "}"
numScopes.times {
outp.puts "}"
}
outp.puts "}"
}
}
outp.puts " return false;"
outp.puts "}"
outp.puts "} } // namespace JSC::B3"
outp.puts "#endif // B3#{matcher.name}_h"
}