You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
2.7 KiB

/*
* Copyright 2002-2019 Intel Corporation.
*
* This software is provided to you as Sample Source Code as defined in the accompanying
* End User License Agreement for the Intel(R) Software Development Products ("Agreement")
* section 1.L.
*
* This software and the related documents are provided as is, with no express or implied
* warranties, other than those that are expressly stated in the License.
*/
/*
* CopyWithXmm(char *dest, const char *src, size_t size)
*
* Copy a memory block using XMM registers. Assumes that
* 'dest' and 'src' are 16-byte aligned and 'size' is a
* multiple of 16 bytes.
*/
.text
.align 4
#if defined(TARGET_MAC)
.globl _CopyWithXmm
_CopyWithXmm:
#else
.globl CopyWithXmm
CopyWithXmm:
#endif
shr $0x5, %rdx
.L1:
movdqa (%rsi), %xmm0
movdqa 0x10(%rsi), %xmm1
movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
lea 0x20(%rsi), %rsi
lea 0x20(%rdi), %rdi
dec %rdx
jne .L1
ret
/*
* CopyWithXmmDelay(char *dest, const char *src, size_t size)
*
* This is exactly the same as CopyWithXmm() except that has
* an internal delay that makes it more likely for a signal
* to interrupt the copy during a critical point.
*/
.text
.align 4
#if defined(TARGET_MAC)
.globl _CopyWithXmmDelay
_CopyWithXmmDelay:
#else
.globl CopyWithXmmDelay
CopyWithXmmDelay:
#endif
shr $0x5, %rdx
.L2:
movdqa (%rsi), %xmm0
movdqa 0x10(%rsi), %xmm1
/*
* Delay for a while. The values in the XMM registers are live
* at this point, so this delay loop opens up a timing window that
* makes it more likely for a signal to interrupt the copy when
* the XMM registers are being used.
*/
mov $0x1000, %rax
.L3:
dec %rax
jne .L3
movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
lea 0x20(%rsi), %rsi
lea 0x20(%rdi), %rdi
dec %rdx
jne .L2
ret
/*
* CopyWithXmmFault(char *dest, const char *src, size_t size)
*
* This is exactly the same as CopyWithXmm() except that there is
* an illegal memory access to address 0x0 at a critical point.
* The caller is expected to set up a SIGSEGV handler to catch the
* illegal access. The handler must change the value of %rax to
* a valid memory location.
*/
.text
.align 4
#if defined(TARGET_MAC)
.globl _CopyWithXmmFault
_CopyWithXmmFault:
#else
.globl CopyWithXmmFault
CopyWithXmmFault:
#endif
shr $0x5, %rdx
.L4:
movdqa (%rsi), %xmm0
movdqa 0x10(%rsi), %xmm1
/*
* This causes a SIGSEGV. The caller must fix %rax in a handler.
*/
mov $0x0, %rax
mov (%rax), %rax
movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
lea 0x20(%rsi), %rsi
lea 0x20(%rdi), %rdi
dec %rdx
jne .L4
ret