blob: 82ba5650ec82ae301cfdbd3f37b03e642fa46324 [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2018 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import re
import subprocess
import sys
from webkitpy.common.system.systemhost import SystemHost
sys.path.append(SystemHost().path_to_lldb_python_directory())
import lldb
# lldb Python reference:
# <https://lldb.llvm.org/python_reference/>
class AnsiColors:
BLUE = '\033[94m'
WARNING = '\033[93m'
ENDCOLOR = '\033[0m'
class ClassLayoutBase(object):
MEMBER_NAME_KEY = 'name'
MEMBER_TYPE_KEY = 'type'
MEMBER_TYPE_CLASS = 'type_class' # lldb.eTypeClassStruct etc. Values here: <https://lldb.llvm.org/python_reference/_lldb%27-module.html#eTypeClassAny>
MEMBER_CLASS_INSTANCE = 'class_instance'
MEMBER_BYTE_SIZE = 'byte_size'
MEMBER_OFFSET = 'offset' # offset is local to this class.
MEMBER_OFFSET_IN_BITS = 'offset_in_bits'
MEMBER_IS_BITFIELD = 'is_bitfield'
MEMBER_BITFIELD_BIT_SIZE = 'bit_size'
PADDING_BYTES_TYPE = 'padding'
PADDING_BITS_TYPE = 'padding_bits'
PADDING_BITS_SIZE = 'padding_bits_size'
PADDING_NAME = ''
def __init__(self, typename):
self.typename = typename
self.total_byte_size = 0
self.total_pad_bytes = 0
self.data_members = []
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
if self.total_byte_size != other.total_byte_size:
return False
if self.total_pad_bytes != other.total_pad_bytes:
return False
if len(self.data_members) != other.data_members:
return False
for i in range(len(self.data_members)):
self_member = self.data_members[i]
other_member = other.data_members[i]
if self_member != other_member:
return False
return True
def _to_string_recursive(self, str_list, colorize, member_name=None, depth=0, total_offset=0):
type_start = AnsiColors.BLUE if colorize else ''
warn_start = AnsiColors.WARNING if colorize else ''
color_end = AnsiColors.ENDCOLOR if colorize else ''
if member_name:
str_list.append('%+4u <%3u> %s%s%s%s %s' % (total_offset, self.total_byte_size, ' ' * depth, type_start, self.typename, color_end, member_name))
else:
str_list.append('%+4u <%3u> %s%s%s%s' % (total_offset, self.total_byte_size, ' ' * depth, type_start, self.typename, color_end))
start_offset = total_offset
for data_member in self.data_members:
member_total_offset = start_offset + data_member[self.MEMBER_OFFSET]
if self.MEMBER_CLASS_INSTANCE in data_member:
data_member[self.MEMBER_CLASS_INSTANCE]._to_string_recursive(str_list, colorize, data_member[self.MEMBER_NAME_KEY], depth + 1, member_total_offset)
else:
byte_size = data_member[self.MEMBER_BYTE_SIZE]
if self.MEMBER_IS_BITFIELD in data_member:
num_bits = data_member[self.MEMBER_BITFIELD_BIT_SIZE]
str_list.append('%+4u < :%1u> %s %s %s : %d' % (member_total_offset, num_bits, ' ' * depth, data_member[self.MEMBER_TYPE_KEY], data_member[self.MEMBER_NAME_KEY], num_bits))
elif data_member[self.MEMBER_TYPE_KEY] == self.PADDING_BYTES_TYPE:
str_list.append('%+4u <%3u> %s %s<PADDING: %d %s>%s' % (member_total_offset, byte_size, ' ' * depth, warn_start, byte_size, 'bytes' if byte_size > 1 else 'byte', color_end))
elif data_member[self.MEMBER_TYPE_KEY] == self.PADDING_BITS_TYPE:
padding_bits = data_member[self.PADDING_BITS_SIZE]
str_list.append('%+4u < :%1u> %s %s<UNUSED BITS: %d %s>%s' % (member_total_offset, padding_bits, ' ' * depth, warn_start, padding_bits, 'bits' if padding_bits > 1 else 'bit', color_end))
else:
str_list.append('%+4u <%3u> %s %s %s' % (member_total_offset, byte_size, ' ' * depth, data_member[self.MEMBER_TYPE_KEY], data_member[self.MEMBER_NAME_KEY]))
def as_string_list(self, colorize=False):
str_list = []
self._to_string_recursive(str_list, colorize)
str_list.append('Total byte size: %d' % (self.total_byte_size))
str_list.append('Total pad bytes: %d' % (self.total_pad_bytes))
if self.total_pad_bytes > 0:
str_list.append('Padding percentage: %2.2f %%' % ((float(self.total_pad_bytes) / float(self.total_byte_size)) * 100.0))
return str_list
def as_string(self, colorize=False):
return '\n'.join(self.as_string_list(colorize))
def dump(self, colorize=True):
print(self.as_string(colorize))
class ClassLayout(ClassLayoutBase):
"Stores the layout of a class or struct."
def __init__(self, target, type, containerClass=None, derivedClass=None):
super(ClassLayout, self).__init__(type.GetName())
self.target = target
self.type = type
self.total_byte_size = self.type.GetByteSize()
self.pointer_size = self.target.GetAddressByteSize()
self.total_pad_bytes = 0
self.data_members = []
self.virtual_base_classes = self._virtual_base_classes_dictionary()
self._parse(containerClass, derivedClass)
if containerClass == None and derivedClass == None:
self.total_pad_bytes = self._compute_padding()
def _has_polymorphic_non_virtual_base_class(self):
num_base_classes = self.type.GetNumberOfDirectBaseClasses()
for i in range(num_base_classes):
base_class = self.type.GetDirectBaseClassAtIndex(i)
if base_class.GetName() in self.virtual_base_classes:
continue
if base_class.GetType().IsPolymorphicClass():
return True
return False
def _virtual_base_classes_dictionary(self):
result = {}
num_virtual_base_classes = self.type.GetNumberOfVirtualBaseClasses()
for i in range(num_virtual_base_classes):
virtual_base = self.type.GetVirtualBaseClassAtIndex(i)
result[virtual_base.GetName()] = ClassLayout(self.target, virtual_base.GetType(), self)
return result
def _parse(self, containerClass=None, derivedClass=None):
# It's moot where we actually show the vtable pointer, but to match clang -fdump-record-layouts, assign it to the
# base-most polymorphic class (unless virtual inheritance is involved).
if self.type.IsPolymorphicClass() and not self._has_polymorphic_non_virtual_base_class():
data_member = {
self.MEMBER_NAME_KEY : '__vtbl_ptr_type * _vptr',
self.MEMBER_TYPE_KEY : '',
self.MEMBER_BYTE_SIZE : self.pointer_size,
self.MEMBER_OFFSET : 0
}
self.data_members.append(data_member)
num_direct_base_classes = self.type.GetNumberOfDirectBaseClasses()
if num_direct_base_classes > 0:
for i in range(num_direct_base_classes):
direct_base = self.type.GetDirectBaseClassAtIndex(i)
# virtual base classes are also considered direct base classes, but we need to skip those here.
if direct_base.GetName() in self.virtual_base_classes:
continue
member_type = direct_base.GetType()
member_typename = member_type.GetName()
member_canonical_type = member_type.GetCanonicalType()
member_type_class = member_canonical_type.GetTypeClass()
member_name = direct_base.GetName()
member_offset = direct_base.GetOffsetInBytes()
member_byte_size = member_type.GetByteSize()
base_class = ClassLayout(self.target, member_type, None, self)
data_member = {
self.MEMBER_NAME_KEY : member_name,
self.MEMBER_TYPE_KEY : member_typename,
self.MEMBER_TYPE_CLASS : member_type_class,
self.MEMBER_CLASS_INSTANCE : base_class,
self.MEMBER_BYTE_SIZE : member_byte_size,
self.MEMBER_OFFSET : member_offset,
}
self.data_members.append(data_member)
num_fields = self.type.GetNumberOfFields()
for i in range(num_fields):
field = self.type.GetFieldAtIndex(i)
member_type = field.GetType()
member_typename = member_type.GetName()
member_canonical_type = member_type.GetCanonicalType()
member_type_class = member_canonical_type.GetTypeClass()
member_name = field.GetName()
member_offset = field.GetOffsetInBytes()
member_byte_size = member_type.GetByteSize()
data_member = {
self.MEMBER_NAME_KEY : member_name,
self.MEMBER_TYPE_KEY : member_typename,
self.MEMBER_TYPE_CLASS : member_type_class,
self.MEMBER_BYTE_SIZE : member_byte_size,
self.MEMBER_OFFSET : member_offset,
}
if field.IsBitfield():
data_member[self.MEMBER_IS_BITFIELD] = True
data_member[self.MEMBER_BITFIELD_BIT_SIZE] = field.GetBitfieldSizeInBits()
data_member[self.MEMBER_OFFSET_IN_BITS] = field.GetOffsetInBits()
# For bitfields, member_byte_size was computed based on the field type without the bitfield modifiers, so compute from the number of bits.
data_member[self.MEMBER_BYTE_SIZE] = (field.GetBitfieldSizeInBits() + 7) / 8
elif member_type_class == lldb.eTypeClassStruct or member_type_class == lldb.eTypeClassClass:
nested_class = ClassLayout(self.target, member_type, self)
data_member[self.MEMBER_CLASS_INSTANCE] = nested_class
self.data_members.append(data_member)
# "For each distinct base class that is specified virtual, the most derived object contains only one base class subobject of that type,
# even if the class appears many times in the inheritance hierarchy (as long as it is inherited virtual every time)."
num_virtual_base_classes = self.type.GetNumberOfVirtualBaseClasses()
if derivedClass == None and num_virtual_base_classes > 0:
for i in range(num_virtual_base_classes):
virtual_base = self.type.GetVirtualBaseClassAtIndex(i)
member_type = virtual_base.GetType()
member_typename = member_type.GetName()
member_canonical_type = member_type.GetCanonicalType()
member_type_class = member_canonical_type.GetTypeClass()
member_name = virtual_base.GetName()
member_offset = virtual_base.GetOffsetInBytes()
member_byte_size = member_type.GetByteSize()
nested_class = ClassLayout(self.target, member_type, None, self)
data_member = {
self.MEMBER_NAME_KEY : member_name,
self.MEMBER_TYPE_KEY : member_typename,
self.MEMBER_TYPE_CLASS : member_type_class,
self.MEMBER_CLASS_INSTANCE : nested_class,
self.MEMBER_BYTE_SIZE : member_byte_size,
self.MEMBER_OFFSET : member_offset,
}
self.data_members.append(data_member)
# clang -fdump-record-layouts shows "(empty)" for such classes, but I can't find any way to access this information via lldb.
def _probably_has_empty_base_class_optimization(self):
if self.total_byte_size > 1:
return False
if len(self.data_members) > 1:
return False
if len(self.data_members) == 1:
data_member = self.data_members[0]
if self.MEMBER_CLASS_INSTANCE in data_member:
return data_member[self.MEMBER_CLASS_INSTANCE]._probably_has_empty_base_class_optimization()
return True
def _compute_padding_recursive(self, total_offset=0, depth=0, containerClass=None):
padding_bytes = 0
start_offset = total_offset
current_offset = total_offset
i = 0
while i < len(self.data_members):
data_member = self.data_members[i]
member_offset = data_member[self.MEMBER_OFFSET]
probably_empty_base_class = False
if self.MEMBER_CLASS_INSTANCE in data_member:
probably_empty_base_class = member_offset == 0 and data_member[self.MEMBER_CLASS_INSTANCE]._probably_has_empty_base_class_optimization()
byte_size = data_member[self.MEMBER_BYTE_SIZE]
if not probably_empty_base_class:
padding_size = start_offset + member_offset - current_offset
if padding_size > 0:
padding_member = {
self.MEMBER_NAME_KEY : self.PADDING_NAME,
self.MEMBER_TYPE_KEY : self.PADDING_BYTES_TYPE,
self.MEMBER_BYTE_SIZE : padding_size,
self.MEMBER_OFFSET : current_offset - start_offset,
}
self.data_members.insert(i, padding_member)
padding_bytes += padding_size
i += 1
if self.MEMBER_IS_BITFIELD in data_member:
next_member_is_bitfield = False
if i < len(self.data_members) - 1:
next_data_member = self.data_members[i + 1]
next_member_is_bitfield = self.MEMBER_IS_BITFIELD in next_data_member
if not next_member_is_bitfield:
end_bit_offset = data_member[self.MEMBER_OFFSET_IN_BITS] + data_member[self.MEMBER_BITFIELD_BIT_SIZE]
unused_bits = (8 - end_bit_offset) % 8
if unused_bits:
bit_padding_member = {
self.MEMBER_NAME_KEY : self.PADDING_NAME,
self.MEMBER_TYPE_KEY : self.PADDING_BITS_TYPE,
self.MEMBER_BYTE_SIZE : data_member[self.MEMBER_BYTE_SIZE],
self.PADDING_BITS_SIZE : unused_bits,
self.MEMBER_OFFSET : data_member[self.MEMBER_OFFSET],
}
self.data_members.insert(i + 1, bit_padding_member)
i += 1
current_offset = start_offset + member_offset
if self.MEMBER_CLASS_INSTANCE in data_member:
[padding, offset] = data_member[self.MEMBER_CLASS_INSTANCE]._compute_padding_recursive(current_offset, depth + 1, self)
padding_bytes += padding
current_offset = offset
else:
current_offset += byte_size
i += 1
# Look for padding at the end.
if containerClass == None:
padding_size = self.total_byte_size - current_offset
if padding_size > 0:
padding_member = {
self.MEMBER_NAME_KEY : self.PADDING_NAME,
self.MEMBER_TYPE_KEY : self.PADDING_BYTES_TYPE,
self.MEMBER_BYTE_SIZE : padding_size,
self.MEMBER_OFFSET : current_offset - start_offset,
}
self.data_members.append(padding_member)
padding_bytes += padding_size
return [padding_bytes, current_offset]
def _compute_padding(self):
[padding, offset] = self._compute_padding_recursive()
return padding
class LLDBDebuggerInstance:
"Wraps an instance of lldb.SBDebugger and vends ClassLayouts"
def __init__(self, binary_path, architecture):
self.binary_path = binary_path
self.architecture = architecture
self.debugger = lldb.SBDebugger.Create()
self.debugger.SetAsync(False)
architecture = self.architecture
if not architecture:
architecture = self._get_first_file_architecture()
self.target = self.debugger.CreateTargetWithFileAndArch(str(self.binary_path), architecture)
if not self.target:
print("Failed to make target for " + self.binary_path)
self.module = self.target.GetModuleAtIndex(0)
if not self.module:
print("Failed to get first module in " + self.binary_path)
def __del__(self):
if lldb:
lldb.SBDebugger.Destroy(self.debugger)
def _get_first_file_architecture(self):
p = re.compile(r'shared library +(\w+)$')
file_result = subprocess.check_output(["file", self.binary_path], encoding='UTF-8').split('\n')
arches = []
for line in file_result:
match = p.search(line)
if match:
arches.append(match.group(1))
if len(arches) > 0:
return arches[0]
return lldb.LLDB_ARCH_DEFAULT
def layout_for_classname(self, classname):
types = self.module.FindTypes(classname)
if types.GetSize():
# There can be more that one type with a given name, but for now just return the first one.
return ClassLayout(self.target, types.GetTypeAtIndex(0))
print('error: no type matches "%s" in "%s"' % (classname, self.module.file))
return None