diff options
Diffstat (limited to 'libffi/src/x86/unix64.S')
-rw-r--r-- | libffi/src/x86/unix64.S | 138 |
1 files changed, 137 insertions, 1 deletions
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S index 2e64b4195bf..f0cd3c9c0c9 100644 --- a/libffi/src/x86/unix64.S +++ b/libffi/src/x86/unix64.S @@ -162,5 +162,141 @@ sse2floatfloat: movaps (%rdi), %xmm0 movq %xmm0, (%rsi) ret - + + .align 2 +.globl ffi_closure_UNIX64 + .type ffi_closure_UNIX64,@function + +ffi_closure_UNIX64: +.LFB2: + pushq %rbp +.LCFI10: + movq %rsp, %rbp +.LCFI11: + subq $240, %rsp +.LCFI12: + movq %rdi, -176(%rbp) + movq %rsi, -168(%rbp) + movq %rdx, -160(%rbp) + movq %rcx, -152(%rbp) + movq %r8, -144(%rbp) + movq %r9, -136(%rbp) + /* FIXME: We can avoid all this stashing of XMM registers by + (in ffi_prep_closure) computing the number of + floating-point args and moving it into %rax before calling + this function. Once this is done, uncomment the next few + lines and only the essential XMM registers will be written + to memory. This is a significant saving. */ +/* movzbl %al, %eax */ +/* movq %rax, %rdx */ +/* leaq 0(,%rdx,4), %rax */ +/* leaq 2f(%rip), %rdx */ +/* subq %rax, %rdx */ + leaq -1(%rbp), %rax +/* jmp *%rdx */ + movaps %xmm7, -15(%rax) + movaps %xmm6, -31(%rax) + movaps %xmm5, -47(%rax) + movaps %xmm4, -63(%rax) + movaps %xmm3, -79(%rax) + movaps %xmm2, -95(%rax) + movaps %xmm1, -111(%rax) + movaps %xmm0, -127(%rax) +2: + movl %edi, -180(%rbp) + movl $0, -224(%rbp) + movl $48, -220(%rbp) + leaq 16(%rbp), %rax + movq %rax, -216(%rbp) + leaq -176(%rbp), %rdx + movq %rdx, -208(%rbp) + leaq -224(%rbp), %rsi + movq %r10, %rdi + movq %rsp, %rdx + call ffi_closure_UNIX64_inner@PLT + + cmpl $FFI_TYPE_FLOAT, %eax + je 1f + cmpl $FFI_TYPE_DOUBLE, %eax + je 2f + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je 3f + cmpl $FFI_TYPE_STRUCT, %eax + je 4f + popq %rax + leave + ret +1: +2: +3: + movaps -240(%rbp), %xmm0 + leave + ret +4: + leave + ret +.LFE2: + + .section .eh_frame,"a",@progbits +.Lframe0: + .long .LECIE1-.LSCIE1 +.LSCIE1: + .long 0x0 + .byte 0x1 + .string "zR" + .uleb128 0x1 + .sleb128 -8 + .byte 0x10 + .uleb128 0x1 + .byte 0x1b + .byte 0xc + .uleb128 0x7 + .uleb128 0x8 + .byte 0x90 + .uleb128 0x1 + .align 8 +.LECIE1: +.LSFDE1: + .long .LEFDE1-.LASFDE1 +.LASFDE1: + .long .LASFDE1-.Lframe0 + + .long .LFB1-. + .long .LFE1-.LFB1 + .uleb128 0x0 + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI0-.LFB1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x10 + .byte 0x86 # DW_CFA_offset: r6 at cfa-16 + .uleb128 0x2 + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI1-.LCFI0 + .byte 0x86 # DW_CFA_offset: r6 at cfa-16 + .uleb128 0x2 + .byte 0xd # DW_CFA_def_cfa_reg: r6 + .uleb128 0x6 + .align 8 +.LEFDE1: +.LSFDE3: + .long .LEFDE3-.LASFDE3 # FDE Length +.LASFDE3: + .long .LASFDE3-.Lframe0 # FDE CIE offset + + .long .LFB2-. # FDE initial location + .long .LFE2-.LFB2 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI10-.LFB2 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x10 + .byte 0x86 # DW_CFA_offset, column 0x6 + .uleb128 0x2 + .byte 0x4 # DW_CFA_advance_loc4 + .long .LCFI11-.LCFI10 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0x6 + .align 8 +.LEFDE3: + #endif /* __x86_64__ */ |