blob: 34e4666df0855684ee639754a1b5edeb68531cf5 [file] [log] [blame]
# encoding: ASCII-8BIT
# iExploder - Generates bad HTML files to perform QA for web browsers.
#
# Copyright 2010 Thomas Stromberg - All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
require 'cgi'
require 'yaml'
require './scanner.rb'
require './version.rb'
# Used to speed up subtest generation
$TEST_CACHE = {}
# Media extensions to proper mime type map (not that we always listen'
$MIME_MAP = {
'bmp' => 'image/bmp',
'gif' => 'image/gif',
'jpg' => 'image/jpeg',
'png' => 'image/png',
'svg' => 'image/svg+xml',
'tiff' => 'image/tiff',
'xbm' => 'image/xbm',
'ico' => 'image/x-icon',
'jng' => 'image/x-jng',
'xpm' => 'image/x-portable-pixmap',
'ogg' => 'audio/ogg',
'snd' => 'audio/basic',
'wav' => 'audio/wav'
}
# These tags get src properties more often than others
$SRC_TAGS = ['img', 'audio', 'video', 'embed']
class IExploder
attr_accessor :test_num, :subtest_data, :lookup_mode, :random_mode, :cgi_url, :browser, :claimed_browser
attr_accessor :offset, :lines, :stop_num, :config
def initialize(config_path)
@config = YAML::load(File.open(config_path))
@stop_num = nil
@subtest_data = nil
@test_num = 0
@cgi_url = '/iexploder.cgi'
@browser = 'UNKNOWN'
@claimed_browser = nil
readTagFiles()
return nil
end
def setRandomSeed
if @test_num > 0
srand(@test_num)
else
srand
end
end
def readTagFiles
# These if statements are so that mod_ruby doesn't have to reload the files
# each time
data_path = @config['mangle_data_path']
@cssTags = readTagsDir("#{data_path}/css-properties")
@cssPseudoTags = readTagsDir("#{data_path}/css-pseudo")
@cssAtRules = readTagsDir("#{data_path}/css-atrules")
@htmlTags = readTagsDir("#{data_path}/html-tags")
@htmlAttr = readTagsDir("#{data_path}/html-attrs")
@htmlValues = readTagsDir("#{data_path}/html-values")
@cssValues = readTagsDir("#{data_path}/css-values")
@headerValues = readTagsDir("#{data_path}/headers")
@protocolValues = readTagsDir("#{data_path}/protocols")
@mimeTypes = readTagsDir("#{data_path}/mime-types")
@media = readMediaDir("#{data_path}/media")
end
def readTagsDir(directory)
values = []
Dir.foreach(directory) { |filename|
if File.file?(directory + "/" + filename)
values = values + readTagFile(directory + "/" + filename)
end
}
return values.uniq
end
def readMediaDir(directory)
data = {}
Dir.foreach(directory) { |filename|
if File.file?(directory + "/" + filename)
(base, extension) = filename.split('.')
mime_type = $MIME_MAP[extension]
data[mime_type] = File.read(directory + "/" + filename)
end
}
return data
end
def readTagFile(filename)
list = Array.new
File.new(filename).readlines.each { |line|
line.chop!
# Don't include comments.
if (line !~ /^# /) && (line.length > 0)
list << line
end
}
return list
end
def generateHtmlValue(tag)
choice = rand(100)
tag = tag.sub('EXCLUDED_', '')
if tag =~ /^on/ and choice < 90
return generateHtmlValue('') + "()"
elsif tag == 'src' or tag == 'data' or tag == 'profile' and choice < 90
return generateGarbageUrl(tag)
end
case choice
when 0..50 then
return @htmlValues[rand(@htmlValues.length)]
when 51..75
return generateGarbageNumber()
when 76..85
return generateGarbageValue()
when 86..90
return generateGarbageNumber() + ',' + generateGarbageNumber()
when 91..98
return generateGarbageUrl(tag)
else
return generateOverflow()
end
end
def generateMediaUrl(tag)
mime_type = @media.keys[rand(@media.keys.length)]
return generateTestUrl(@test_num, nil, nil, mime_type)
end
def generateGarbageUrl(tag)
choice = rand(100)
case choice
when 0..30
return generateMediaUrl(tag)
when 31..50
return @protocolValues[rand(@protocolValues.length)] + '%' + generateGarbageValue()
when 51..60
return @protocolValues[rand(@protocolValues.length)] + '//../' + generateGarbageValue()
when 60..75
return @protocolValues[rand(@protocolValues.length)] + '//' + generateGarbageValue()
when 75..85
return generateOverflow() + ":" + generateGarbageValue()
when 86..97
return generateGarbageValue() + ":" + generateOverflow()
else
return generateOverflow()
end
end
def generateCssValue(property)
size_types = ['', 'em', 'px', '%', 'pt', 'pc', 'ex', 'in', 'cm', 'mm']
choice = rand(100)
case choice
when 0..50 then
# return the most likely scenario
case property.sub('EXCLUDED_', '')
when /-image|content/
return 'url(' + generateGarbageUrl(property) + ')'
when /-width|-radius|-spacing|margin|padding|height/
return generateGarbageValue() + size_types[rand(size_types.length)]
when /-color/
return generateGarbageColor()
when /-delay|-duration/
return generateGarbageValue() + 'ms'
else
return @cssValues[rand(@cssValues.length)]
end
when 51..75 then return generateGarbageNumber()
when 76..85 then return 'url(' + generateGarbageUrl(property) + ')'
when 85..98 then return generateGarbageValue()
else
return generateOverflow()
end
end
def generateGarbageColor()
case rand(100)
when 0..50 then return '#' + generateGarbageValue()
when 51..70 then return 'rgb(' + generateGarbageNumber() + ',' + generateGarbageNumber() + ',' + generateGarbageNumber() + ')'
when 71..98 then return 'rgb(' + generateGarbageNumber() + '%,' + generateGarbageNumber() + '%,' + generateGarbageNumber() + '%)'
else
return generateOverflow()
end
end
def generateGarbageNumber()
choice = rand(100)
case choice
when 0 then return '0'
when 1..40 then return '9' * rand(100)
when 41..60 then return '999999.' + rand(999999999999999999999).to_s
when 61..80 then return '-' + ('9' * rand(100))
when 81..90 then return '-999999.' + rand(999999999999999999999).to_s
when 91..98 then return generateGarbageText()
else
return generateOverflow()
end
end
def generateGarbageValue()
case rand(100)
when 0..30 then return rand(255).chr * rand(@config['buffer_overflow_length'])
when 31..50 then return "%n" * 50
when 51..65 then return ("&#" + rand(999999).to_s + ";") * rand(@config['max_garbage_text_size'])
when 66..70 then
junk = []
0.upto(rand(20)+1) do
junk << "\\x" + rand(65535).to_s(16)
end
return junk.join('') * rand(@config['max_garbage_text_size'])
when 71..99 then
junk = []
chars = '%?!$#^0123456789ABCDEF%#./\&|;'
0.upto(rand(20)+1) do
junk << chars[rand(chars.length)].chr
end
return junk.join('') * rand(@config['max_garbage_text_size'])
end
end
def generateOverflow()
return rand(255).chr * (@config['buffer_overflow_length'] + rand(500))
end
def generateGarbageText
case rand(100)
when 0..70 then return 'X' * 129
when 71..75 then return "%n" * 15
when 76..85 then return ("&#" + rand(9999999999999).to_s + ";") * rand(@config['max_garbage_text_size'])
when 86..90 then return generateGarbageValue()
when 91..98 then return rand(255).chr * rand(@config['max_garbage_text_size'])
else
return generateOverflow()
end
end
def isPropertyInBlacklist(properties)
# Format: [img, src] or [img, style, property]
blacklist_entries = []
if @config.has_key?('exclude') and @config['exclude']
blacklist_entries << properties.join('.')
wildcard_property = properties.dup
wildcard_property[0] = '*'
blacklist_entries << wildcard_property.join('.')
blacklist_entries.each do |entry|
if @config['exclude'].has_key?(entry) and @browser =~ /#{@config['exclude'][entry]}/
return true
end
end
end
return false
end
def generateCssStyling(tag)
out = ' style="'
0.upto(rand(@config['properties_per_style_max'])) {
property = @cssTags[rand(@cssTags.length)]
if isPropertyInBlacklist([tag, 'style', property])
property = "EXCLUDED_#{property}"
end
out << property
# very small chance we let the tag run on.
if rand(65) > 1
out << ": "
end
values = []
0.upto(rand(@config['attributes_per_style_property_max'])) {
values << generateCssValue(property)
}
out << values.join(' ')
# we almost always put the ; there.
if rand(65) > 1
out << ";\n "
end
}
out << "\""
return out
end
def mangleTag(tag, no_close_chance=false)
if not no_close_chance and rand(100) < 15
return "</" + tag + ">"
end
out = "<" + tag
if rand(100) > 1
out << ' '
else
out << generateOverflow()
end
attrNum = rand(@config['attributes_per_html_tag_max']) + 1
attrs = []
# The HTML head tag does not have many useful attributes, but is always included in tests.
if tag == 'head' and rand(100) < 75
case rand(3)
when 0 then attrs << 'lang'
when 1 then attrs << 'dir'
when 2 then attrs << 'profile'
end
end
# 75% of the time, these tags get a src attribute
if $SRC_TAGS.include?(tag) and rand(100) < 75
if @config.has_key?('exclude') and @config['exclude'] and @config['exclude'].has_key?("#{tag}.src")
attrs << 'EXCLUDED_src'
else
attrs << 'src'
end
end
while attrs.length < attrNum
attribute = @htmlAttr[rand(@htmlAttr.length)]
if isPropertyInBlacklist([tag, attribute])
attribute = "EXCLUDED_#{attribute}"
end
attrs << attribute
end
# Add a few HTML attributes
for attr in attrs
out << attr
if rand(100) > 1
out << '='
end
if (rand(100) >= 50)
quoted = 1
out << "\""
else
quoted = nil
end
out << generateHtmlValue(attr)
if quoted
if rand(100) >= 10
out << "\""
end
end
if rand(100) >= 1
out << "\n "
end
end
if rand(100) >= 25
out << generateCssStyling(tag)
end
out << ">\n"
return out
end
def nextTestNum()
if @subtest_data
return @test_num
elsif @random_mode
return rand(99999999999)
else
return @test_num + 1
end
end
def generateCssPattern()
# Generate a CSS selector pattern.
choice = rand(100)
pattern = ''
case choice
when 0..84 then pattern = @htmlTags[rand(@htmlTags.length)].dup
when 85..89 then pattern = "*"
when 90..94 then pattern = @cssAtRules[rand(@cssAtRules.length)].dup
when 95..100 then pattern = ''
end
if rand(100) < 25
pattern << " " + @htmlTags[rand(@htmlTags.length)]
end
if rand(100) < 25
pattern << " > " + @htmlTags[rand(@htmlTags.length)]
end
if rand(100) < 25
pattern << " + " + @htmlTags[rand(@htmlTags.length)]
end
if rand(100) < 10
pattern << "*"
end
if rand(100) < 25
pseudo = @cssPseudoTags[rand(@cssPseudoTags.length)].dup
# These tags typically have a parenthesis
if (pseudo =~ /^lang|^nth|^not/ and rand(100) < 75 and pseudo !~ /\(/) or rand(100) < 20
pseudo << '('
end
if pseudo =~ /\(/
if rand(100) < 75
pseudo << generateGarbageValue()
end
if rand(100) < 75
pseudo << ')'
end
end
pattern << ":" + pseudo
end
if rand(100) < 20
html_attr = @htmlAttr[rand(@htmlAttr.length)]
match = '[' + html_attr
choice = rand(100)
garbage = generateGarbageValue()
case choice
when 0..25 then match << ']'
when 26..50 then match << "=\"#{garbage}\"]"
when 51..75 then match << "=~\"#{garbage}\"]"
when 76..99 then match << "|=\"#{garbage}\"]"
end
pattern << match
end
if rand(100) < 20
if rand(100) < 50
pattern << '.' + generateGarbageValue()
else
pattern << '.*'
end
end
if rand(100) < 20
pattern << '#' + generateGarbageValue()
end
if rand(100) < 5
pattern << ' #' + generateGarbageValue()
end
return pattern
end
def buildStyleTag()
out = "\n"
0.upto(rand(@config['properties_per_style_max'])) {
out << generateCssPattern()
if rand(100) < 90
out << " {\n"
end
0.upto(rand(@config['properties_per_style_max'])) {
property = @cssTags[rand(@cssTags.length)].dup
if isPropertyInBlacklist(['style', 'style', property])
property = " EXCLUDED_#{property}"
end
out << " #{property}: "
values = []
0.upto(rand(@config['attributes_per_style_property_max'])) {
values << generateCssValue(property)
}
out << values.join(' ')
if rand(100) < 95
out << ";\n"
end
}
if rand(100) < 90
out << "\n}\n"
end
}
return out
end
# Build any malicious javascript here. Fairly naive at the moment.
def buildJavaScript
target = @htmlTags[rand(@htmlTags.length)]
css_property = @cssTags[rand(@cssTags.length)]
css_property2 = @cssTags[rand(@cssTags.length)]
html_attr = @htmlAttr[rand(@htmlAttr.length)]
css_value = generateCssValue(css_property)
html_value = generateHtmlValue(html_attr)
html_value2 = generateGarbageNumber()
mangled = mangleTag(@htmlTags[rand(@htmlTags.length)]);
mangled2 = mangleTag(@htmlTags[rand(@htmlTags.length)]);
js = []
js << "window.onload=function(){"
js << " var ietarget = document.createElement('#{target}');"
js << " ietarget.style.#{css_property} = '#{css_value}';"
js << " ietarget.#{html_attr} = '#{html_value}';"
js << " document.body.appendChild(ietarget);"
js << " ietarget.style.#{css_property2} = #{html_value2};"
js << " document.write('#{mangled}');"
js << " document.write('#{mangled2}');"
js << "}"
return js.join("\n")
end
def buildMediaFile(mime_type)
if @media.has_key?(mime_type)
data = @media[mime_type].dup
else
puts "No media found for #{mime_type}"
data = generateGarbageText()
end
# corrupt it in a subtle way
choice = rand(100)
if choice > 50
garbage = generateGarbageValue()
else
garbage = rand(255).chr * rand(8)
end
if "1.9".respond_to?(:encoding)
garbage.force_encoding('ASCII-8BIT')
data.force_encoding('ASCII-8BIT')
end
garbage_start = rand(data.length)
garbage_end = garbage_start + garbage.length
data[garbage_start..garbage_end] = garbage
if rand(100) < 15
data << generateGarbageValue()
end
return data
end
# Parse the subtest data passed in as part of the URL
def parseSubTestData(subtest_data)
# Initialize with one line at 0
if not subtest_data or subtest_data.to_i == 0
return [@config['initial_subtest_width'], [0]]
end
(lines_at_time, offsets_string) = subtest_data.split('_')
offsets = offsets_string.split(',').map! {|x| x.to_i }
return [lines_at_time.to_i, offsets]
end
def generateTestUrl(test_num, subtest_width=nil, subtest_offsets=nil, mime_type=nil)
url = @cgi_url + '?'
if subtest_width
if subtest_offsets.length > @config['subtest_combinations_max']
url << "t=" << test_num.to_s << "&l=test_redirect&z=THE_END"
else
url << "t=" << test_num.to_s << "&s=" << subtest_width.to_s << "_" << subtest_offsets.join(',')
end
else
url << "t=" << test_num.to_s
end
if @random_mode
url << "&r=1"
elsif @stop_num
url << "&x=" << @stop_num.to_s
end
if mime_type
url << '&m=' + CGI::escape(mime_type)
end
url << "&b=" << CGI::escape(@browser)
return url
end
def buildBodyTags(tag_count)
tagList = ['body']
# subtract the <body> tag from tag_count.
1.upto(tag_count-1) { tagList << @htmlTags[rand(@htmlTags.length)] }
# Lean ourselves toward lots of img and src tests
for tag, percent in @config['favor_html_tags']
if rand(100) < percent.to_f
# Don't overwrite the body tag.
tagList[rand(tagList.length-1)+1] = tag
end
end
# Now we have our hitlist of tags,lets mangle them.
mangled_tags = []
tagList.each do |tag|
tag_data = mangleTag(tag)
if tag == 'script'
if rand(100) < 40
tag_data = "<script>"
end
tag_data << buildJavaScript() + "\n" + "</script>\n"
elsif tag == 'style'
if rand(100) < 40
tag_data = "<style>"
end
tag_data << buildStyleTag() + "\n" + "</style>\n"
elsif rand(100) <= 90
tag_data << generateGarbageText() << "\n"
else
tag_data << "\n"
end
if rand(100) <= 33
tag_data << "</#{tag}>\n"
end
mangled_tags << "\n<!-- START #{tag} -->\n" + tag_data + "\n<!-- END #{tag} -->\n"
end
return mangled_tags
end
def buildHeaderTags(tag_count)
valid_head_tags = ['title', 'base', 'link', 'meta']
header_tags = ['html', 'head']
1.upto(tag_count-1) { header_tags << valid_head_tags[rand(valid_head_tags.length)] }
header_tags << @htmlTags[rand(@htmlTags.length)]
mangled_tags = []
header_tags.each do |tag|
mangled_tags << mangleTag(tag, no_close_chance=true)
end
return mangled_tags
end
def buildSurvivedPage(page_type)
page = "<html><head>"
page << "<body>Bummer. You survived both redirects. Let me go sulk in the corner.</body>"
page << "</html>"
return page
end
def buildRedirect(test_num, subtest_data, lookup_mode, stop_num=nil)
# no more redirects.
if lookup_mode == '1' or stop_num == test_num
return ''
end
if subtest_data
width, offsets = parseSubTestData(@subtest_data)
else
width, offsets = nil
end
# We still need a redirect, but don't bother generating new data.
if lookup_mode
redirect_url = generateTestUrl(test_num, width, offsets)
if lookup_mode == 'test_redirect'
redirect_url << "&l=test_another_redirect"
elsif lookup_mode == 'test_another_redirect'
redirect_url << "&l=survived_redirect"
else
redirect_url << "&l=#{lookup_mode}"
end
else
# This is a normal redirect going on to the next page. If we have subtest, get the next one.
if subtest_data
width, offsets = combine_combo_creator(@config['html_tags_per_page'], width, offsets)[0..1]
end
redirect_url = generateTestUrl(nextTestNum(), width, offsets)
end
redirect_code = "\t<META HTTP-EQUIV=\"Refresh\" content=\"0;URL=#{redirect_url}\">\n"
# use both techniques, because you never know how you might be corrupting yourself.
redirect_code << "\t<script language=\"javascript\">setTimeout('window.location=\"#{redirect_url}\"', 1000);</script>\n"
return redirect_code
end
def buildPage()
if @lookup_mode == 'survived_redirect'
return self.buildSurvivedPage(@lookup_mode)
end
tag_count = @config['html_tags_per_page']
if $TEST_CACHE.has_key?(@test_num)
(header_tags, body_tags) = $TEST_CACHE[@test_num]
else
header_tags = buildHeaderTags(3)
body_tags = buildBodyTags(tag_count - header_tags.length)
end
required_tags = {
0 => 'html',
1 => 'head',
header_tags.length => 'body'
}
if @subtest_data and @subtest_data.length > 0
if not $TEST_CACHE.has_key?(@test_num)
$TEST_CACHE[@test_num] = [header_tags, body_tags]
end
(width, offsets) = parseSubTestData(@subtest_data)
lines = combine_combo_creator(tag_count, width, offsets)[2]
all_tags = header_tags + body_tags
body_start = header_tags.length
header_tags = []
body_tags = []
# <html> and <body> are required, regardless of their existence in the subtest data.
0.upto(tag_count) do |line_number|
tag_data = nil
if lines.include?(line_number)
tag_data = all_tags[line_number]
elsif required_tags.key?(line_number)
tag_data = "<" + required_tags[line_number] + ">"
end
if tag_data
if line_number < body_start
header_tags << tag_data
else
body_tags << tag_data
end
end
end
header_tags << "<!-- subtest mode: #{offsets.length} combinations, width: #{width} -->"
end
htmlText = header_tags[0..1].join("\n\t")
htmlText << buildRedirect(@test_num, @subtest_data, @lookup_mode, @stop_num)
htmlText << "<title>[#{@test_num}:#{@subtest_data}] iExploder #{$VERSION} - #{generateGarbageText()}</title>\n"
if @claimed_browser and @claimed_browser.length > 1
show_browser = @claimed_browser
else
show_browser = @browser
end
htmlText << "\n<!-- iExploder #{$VERSION} | test #{@test_num}:#{@subtest_data} at #{Time.now} -->\n"
htmlText << "<!-- browser: #{show_browser} -->\n"
htmlText << header_tags[2..-1].join("\n\t")
htmlText << "\n</head>\n\n"
htmlText << body_tags.join("\n")
htmlText << "</body>\n</html>"
return htmlText
end
def buildHeaders(mime_type)
use_headers = []
banned_headers = []
response = {'Content-Type' => mime_type}
0.upto(rand(@config['headers_per_page_max'])) do
try_header = @headerValues[rand(@headerValues.length)]
if ! banned_headers.include?(try_header.downcase)
use_headers << try_header
end
end
for header in use_headers.uniq
if rand(100) > 75
response[header] = generateGarbageNumber()
else
response[header] = generateGarbageUrl(header)
end
end
return response
end
end
# for testing
if $0 == __FILE__
ie = IExploder.new('config.yaml')
ie.test_num = ARGV[0].to_i || 1
ie.subtest_data = ARGV[1] || nil
mime_type = ARGV[2] || nil
ie.setRandomSeed()
if not mime_type
html_output = ie.buildPage()
puts html_output
else
headers = ie.buildHeaders(mime_type)
for (key, value) in headers
puts "#{key}: #{value}"
end
puts "Mime-Type: #{mime_type}"
puts ie.buildMediaFile(mime_type)
end
end