blob: edfe5c307ff28bf7a3220449428988cc35d2668c [file] [log] [blame]
;/*
; Copyright (C) 2014 Apple Inc. All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; 1. Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
; OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;*/
EXTERN getHostCallReturnValueWithExecState : near
PUBLIC getHostCallReturnValue
PUBLIC ctiMasmProbeTrampoline
_TEXT SEGMENT
getHostCallReturnValue PROC
lea rcx, [rsp - 8]
; Allocate space for all 4 parameter registers, and align stack pointer to 16 bytes boundary by allocating another 8 bytes.
; The stack alignment is needed to fix a crash in the CRT library on a floating point instruction.
sub rsp, 40
call getHostCallReturnValueWithExecState
add rsp, 40
ret
getHostCallReturnValue ENDP
; The following constants must match the x86_64 version in MacroAssemblerX86Common.cpp.
PTR_SIZE EQU 8
PROBE_PROBE_FUNCTION_OFFSET EQU (0 * PTR_SIZE)
PROBE_ARG_OFFSET EQU (1 * PTR_SIZE)
PROBE_INIT_STACK_FUNCTION_OFFSET EQU (2 * PTR_SIZE)
PROBE_INIT_STACK_ARG_OFFSET EQU (3 * PTR_SIZE)
PROBE_FIRST_GPR_OFFSET EQU (4 * PTR_SIZE)
PROBE_CPU_EAX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE))
PROBE_CPU_ECX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE))
PROBE_CPU_EDX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE))
PROBE_CPU_EBX_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE))
PROBE_CPU_ESP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE))
PROBE_CPU_EBP_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE))
PROBE_CPU_ESI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE))
PROBE_CPU_EDI_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE))
PROBE_CPU_R8_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE))
PROBE_CPU_R9_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE))
PROBE_CPU_R10_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE))
PROBE_CPU_R11_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE))
PROBE_CPU_R12_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE))
PROBE_CPU_R13_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE))
PROBE_CPU_R14_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE))
PROBE_CPU_R15_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE))
PROBE_FIRST_SPR_OFFSET EQU (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE))
PROBE_CPU_EIP_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (0 * PTR_SIZE))
PROBE_CPU_EFLAGS_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (1 * PTR_SIZE))
PROBE_FIRST_XMM_OFFSET EQU (PROBE_FIRST_SPR_OFFSET + (2 * PTR_SIZE))
XMM_SIZE EQU 8
PROBE_CPU_XMM0_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE))
PROBE_CPU_XMM1_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE))
PROBE_CPU_XMM2_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE))
PROBE_CPU_XMM3_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (3 * XMM_SIZE))
PROBE_CPU_XMM4_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (4 * XMM_SIZE))
PROBE_CPU_XMM5_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (5 * XMM_SIZE))
PROBE_CPU_XMM6_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE))
PROBE_CPU_XMM7_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE))
PROBE_CPU_XMM8_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE))
PROBE_CPU_XMM9_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE))
PROBE_CPU_XMM10_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE))
PROBE_CPU_XMM11_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE))
PROBE_CPU_XMM12_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE))
PROBE_CPU_XMM13_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE))
PROBE_CPU_XMM14_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE))
PROBE_CPU_XMM15_OFFSET EQU (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE))
PROBE_SIZE EQU (PROBE_CPU_XMM15_OFFSET + XMM_SIZE)
PROBE_EXECUTOR_OFFSET EQU PROBE_SIZE ; Stash the executeProbe function pointer at the end of the ProbeContext.
OUT_SIZE EQU (5 * PTR_SIZE)
ctiMasmProbeTrampoline PROC
pushfq
; MacroAssemblerX86Common::probe() has already generated code to store some values.
; Together with the rflags pushed above, the top of stack now looks like this:
; rsp[0 * ptrSize]: rflags
; rsp[1 * ptrSize]: return address / saved rip
; rsp[2 * ptrSize]: saved rbx
; rsp[3 * ptrSize]: saved rdx
; rsp[4 * ptrSize]: saved rcx
; rsp[5 * ptrSize]: saved rax
;
; Incoming registers contain:
; rcx: Probe::executeProbe
; rdx: probe function
; rbx: probe arg
; rax: scratch (was ctiMasmProbeTrampoline)
mov rax, rsp
sub rsp, PROBE_SIZE + OUT_SIZE
; The X86_64 ABI specifies that the worse case stack alignment requirement is 32 bytes.
and rsp, not 01fh
; Since sp points to the ProbeContext, we've ensured that it's protected from interrupts before we initialize it.
mov [PROBE_CPU_EBP_OFFSET + rsp], rbp
mov rbp, rsp ; Save the ProbeContext*.
mov [PROBE_EXECUTOR_OFFSET + rbp], rcx
mov [PROBE_PROBE_FUNCTION_OFFSET + rbp], rdx
mov [PROBE_ARG_OFFSET + rbp], rbx
mov [PROBE_CPU_ESI_OFFSET + rbp], rsi
mov [PROBE_CPU_EDI_OFFSET + rbp], rdi
mov rcx, [0 * PTR_SIZE + rax]
mov [PROBE_CPU_EFLAGS_OFFSET + rbp], rcx
mov rcx, [1 * PTR_SIZE + rax]
mov [PROBE_CPU_EIP_OFFSET + rbp], rcx
mov rcx, [2 * PTR_SIZE + rax]
mov [PROBE_CPU_EBX_OFFSET + rbp], rcx
mov rcx, [3 * PTR_SIZE + rax]
mov [PROBE_CPU_EDX_OFFSET + rbp], rcx
mov rcx, [4 * PTR_SIZE + rax]
mov [PROBE_CPU_ECX_OFFSET + rbp], rcx
mov rcx, [5 * PTR_SIZE + rax]
mov [PROBE_CPU_EAX_OFFSET + rbp], rcx
mov rcx, rax
add rcx, 6 * PTR_SIZE
mov [PROBE_CPU_ESP_OFFSET + rbp], rcx
mov [PROBE_CPU_R8_OFFSET + rbp], r8
mov [PROBE_CPU_R9_OFFSET + rbp], r9
mov [PROBE_CPU_R10_OFFSET + rbp], r10
mov [PROBE_CPU_R11_OFFSET + rbp], r11
mov [PROBE_CPU_R12_OFFSET + rbp], r12
mov [PROBE_CPU_R13_OFFSET + rbp], r13
mov [PROBE_CPU_R14_OFFSET + rbp], r14
mov [PROBE_CPU_R15_OFFSET + rbp], r15
movq qword ptr [PROBE_CPU_XMM0_OFFSET + rbp], xmm0
movq qword ptr [PROBE_CPU_XMM1_OFFSET + rbp], xmm1
movq qword ptr [PROBE_CPU_XMM2_OFFSET + rbp], xmm2
movq qword ptr [PROBE_CPU_XMM3_OFFSET + rbp], xmm3
movq qword ptr [PROBE_CPU_XMM4_OFFSET + rbp], xmm4
movq qword ptr [PROBE_CPU_XMM5_OFFSET + rbp], xmm5
movq qword ptr [PROBE_CPU_XMM6_OFFSET + rbp], xmm6
movq qword ptr [PROBE_CPU_XMM7_OFFSET + rbp], xmm7
movq qword ptr [PROBE_CPU_XMM8_OFFSET + rbp], xmm8
movq qword ptr [PROBE_CPU_XMM9_OFFSET + rbp], xmm9
movq qword ptr [PROBE_CPU_XMM10_OFFSET + rbp], xmm10
movq qword ptr [PROBE_CPU_XMM11_OFFSET + rbp], xmm11
movq qword ptr [PROBE_CPU_XMM12_OFFSET + rbp], xmm12
movq qword ptr [PROBE_CPU_XMM13_OFFSET + rbp], xmm13
movq qword ptr [PROBE_CPU_XMM14_OFFSET + rbp], xmm14
movq qword ptr [PROBE_CPU_XMM15_OFFSET + rbp], xmm15
mov rcx, rbp ; the Probe::State* arg.
sub rsp, 32 ; shadow space
call qword ptr[PROBE_EXECUTOR_OFFSET + rbp]
add rsp, 32
; Make sure the ProbeContext is entirely below the result stack pointer so
; that register values are still preserved when we call the initializeStack
; function.
mov rcx, PROBE_SIZE + OUT_SIZE
mov rax, rbp
mov rdx, [PROBE_CPU_ESP_OFFSET + rbp]
add rax, rcx
cmp rdx, rax
jge ctiMasmProbeTrampolineProbeContextIsSafe
; Allocate a safe place on the stack below the result stack pointer to stash the ProbeContext.
sub rdx, rcx
and rdx, not 01fh ; Keep the stack pointer 32 bytes aligned.
xor rax, rax
mov rsp, rdx
mov rcx, PROBE_SIZE
; Copy the ProbeContext to the safe place.
ctiMasmProbeTrampolineCopyLoop:
mov rdx, [rbp + rax]
mov [rsp + rax], rdx
add rax, PTR_SIZE
cmp rcx, rax
jg ctiMasmProbeTrampolineCopyLoop
mov rbp, rsp
; Call initializeStackFunction if present.
ctiMasmProbeTrampolineProbeContextIsSafe:
xor rcx, rcx
add rcx, [PROBE_INIT_STACK_FUNCTION_OFFSET + rbp]
je ctiMasmProbeTrampolineRestoreRegisters
mov rdx, rcx
mov rcx, rbp ; the Probe::State* arg.
sub rsp, 32 ; shadow space
call rdx
add rsp, 32
ctiMasmProbeTrampolineRestoreRegisters:
; To enable probes to modify register state, we copy all registers
; out of the ProbeContext before returning.
mov rdx, [PROBE_CPU_EDX_OFFSET + rbp]
mov rbx, [PROBE_CPU_EBX_OFFSET + rbp]
mov rsi, [PROBE_CPU_ESI_OFFSET + rbp]
mov rdi, [PROBE_CPU_EDI_OFFSET + rbp]
mov r8, [PROBE_CPU_R8_OFFSET + rbp]
mov r9, [PROBE_CPU_R9_OFFSET + rbp]
mov r10, [PROBE_CPU_R10_OFFSET + rbp]
mov r11, [PROBE_CPU_R11_OFFSET + rbp]
mov r12, [PROBE_CPU_R12_OFFSET + rbp]
mov r13, [PROBE_CPU_R13_OFFSET + rbp]
mov r14, [PROBE_CPU_R14_OFFSET + rbp]
mov r15, [PROBE_CPU_R15_OFFSET + rbp]
movq xmm0, qword ptr[PROBE_CPU_XMM0_OFFSET + rbp]
movq xmm1, qword ptr[PROBE_CPU_XMM1_OFFSET + rbp]
movq xmm2, qword ptr[PROBE_CPU_XMM2_OFFSET + rbp]
movq xmm3, qword ptr[PROBE_CPU_XMM3_OFFSET + rbp]
movq xmm4, qword ptr[PROBE_CPU_XMM4_OFFSET + rbp]
movq xmm5, qword ptr[PROBE_CPU_XMM5_OFFSET + rbp]
movq xmm6, qword ptr[PROBE_CPU_XMM6_OFFSET + rbp]
movq xmm7, qword ptr[PROBE_CPU_XMM7_OFFSET + rbp]
movq xmm8, qword ptr[PROBE_CPU_XMM8_OFFSET + rbp]
movq xmm9, qword ptr[PROBE_CPU_XMM9_OFFSET + rbp]
movq xmm10, qword ptr[PROBE_CPU_XMM10_OFFSET + rbp]
movq xmm11, qword ptr[PROBE_CPU_XMM11_OFFSET + rbp]
movq xmm12, qword ptr[PROBE_CPU_XMM12_OFFSET + rbp]
movq xmm13, qword ptr[PROBE_CPU_XMM13_OFFSET + rbp]
movq xmm14, qword ptr[PROBE_CPU_XMM14_OFFSET + rbp]
movq xmm15, qword ptr[PROBE_CPU_XMM15_OFFSET + rbp]
; There are 6 more registers left to restore:
; rax, rcx, rbp, rsp, rip, and rflags.
; The restoration process at ctiMasmProbeTrampolineEnd below works by popping
; 5 words off the stack into rflags, rax, rcx, rbp, and rip. These 5 words need
; to be pushed on top of the final esp value so that just by popping the 5 words,
; we'll get the esp that the probe wants to set. Let's call this area (for storing
; these 5 words) the restore area.
mov rcx, [PROBE_CPU_ESP_OFFSET + rbp]
sub rcx, 5 * PTR_SIZE
; rcx now points to the restore area.
; Copy remaining restore values from the ProbeContext to the restore area.
; Note: We already ensured above that the ProbeContext is in a safe location before
; calling the initializeStackFunction. The initializeStackFunction is not allowed to
; change the stack pointer again.
mov rax, [PROBE_CPU_EFLAGS_OFFSET + rbp]
mov [0 * PTR_SIZE + rcx], rax
mov rax, [PROBE_CPU_EAX_OFFSET + rbp]
mov [1 * PTR_SIZE + rcx], rax
mov rax, [PROBE_CPU_ECX_OFFSET + rbp]
mov [2 * PTR_SIZE + rcx], rax
mov rax, [PROBE_CPU_EBP_OFFSET + rbp]
mov [3 * PTR_SIZE + rcx], rax
mov rax, [PROBE_CPU_EIP_OFFSET + rbp]
mov [4 * PTR_SIZE + rcx], rax
mov rsp, rcx
; Do the remaining restoration by popping off the restore area.
popfq
pop rax
pop rcx
pop rbp
ret
ctiMasmProbeTrampoline ENDP
_TEXT ENDS
END