/* * Copyright 2002-2019 Intel Corporation. * * This software is provided to you as Sample Source Code as defined in the accompanying * End User License Agreement for the Intel(R) Software Development Products ("Agreement") * section 1.L. * * This software and the related documents are provided as is, with no express or implied * warranties, other than those that are expressly stated in the License. */ #include #include using std::cout; using std::endl; #define N 1024 int main(int argc, char** argv); #if defined( __GNUC__) #include typedef uint8_t UINT8; //LINUX HOSTS typedef uint16_t UINT16; typedef uint32_t UINT32; typedef uint64_t UINT64; typedef int8_t INT8; typedef int16_t INT16; typedef int32_t INT32; typedef int64_t INT64; #define ALIGN16 __attribute__ ((aligned(16))) #define ALIGN8 __attribute__ ((aligned(8))) #elif defined(_MSC_VER) typedef unsigned __int8 UINT8 ; typedef unsigned __int16 UINT16; typedef unsigned __int32 UINT32; typedef unsigned __int64 UINT64; typedef __int8 INT8; typedef __int16 INT16; typedef __int32 INT32; typedef __int64 INT64; #define ALIGN16 __declspec(align(16)) #define ALIGN8 __declspec(align(8)) #else #error Expect usage of either GNU or MS compiler. #endif #define MAX_XMM_REGS 16 #define MAX_BYTES_PER_XMM_REG 16 #define MAX_WORDS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/2) #define MAX_DWORDS_PER_XMM_REG (MAX_WORDS_PER_XMM_REG/2) #define MAX_QWORDS_PER_XMM_REG (MAX_DWORDS_PER_XMM_REG/2) #define MAX_FLOATS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(float)) #define MAX_DOUBLES_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(double)) #define MAX_MMX_REGS 8 #define MAX_BYTES_PER_MMX_REG 8 #define MAX_WORDS_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/2) #define MAX_DWORDS_PER_MMX_REG (MAX_WORDS_PER_MMX_REG/2) #define MAX_QWORDS_PER_MMX_REG (MAX_DWORDS_PER_MMX_REG/2) #define MAX_FLOATS_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/sizeof(float)) #define MAX_DOUBLES_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/sizeof(double)) union ALIGN16 xmm_reg_t { UINT8 byte[MAX_BYTES_PER_XMM_REG]; UINT16 word[MAX_WORDS_PER_XMM_REG]; UINT32 dword[MAX_DWORDS_PER_XMM_REG]; UINT64 qword[MAX_QWORDS_PER_XMM_REG]; INT8 s_byte[MAX_BYTES_PER_XMM_REG]; INT16 s_word[MAX_WORDS_PER_XMM_REG]; INT32 s_dword[MAX_DWORDS_PER_XMM_REG]; INT64 s_qword[MAX_QWORDS_PER_XMM_REG]; float flt[MAX_FLOATS_PER_XMM_REG]; double dbl[MAX_DOUBLES_PER_XMM_REG]; }; union ALIGN8 mmx_reg_t { UINT8 byte[MAX_BYTES_PER_MMX_REG]; UINT16 word[MAX_WORDS_PER_MMX_REG]; UINT32 dword[MAX_DWORDS_PER_MMX_REG]; UINT64 qword[MAX_QWORDS_PER_MMX_REG]; INT8 s_byte[MAX_BYTES_PER_MMX_REG]; INT16 s_word[MAX_WORDS_PER_MMX_REG]; INT32 s_dword[MAX_DWORDS_PER_MMX_REG]; INT64 s_qword[MAX_QWORDS_PER_MMX_REG]; float flt[MAX_FLOATS_PER_MMX_REG]; double dbl[MAX_DOUBLES_PER_MMX_REG]; }; #if defined(__GNUC__) static void set_xmm_reg0(xmm_reg_t& xmm_reg) { asm volatile("movdqu %0, %%xmm0" : : "m" (xmm_reg) : "%xmm0" ); } #elif defined(_MSC_VER) extern "C" void set_xmm_reg0(xmm_reg_t& xmm_reg); #endif #if defined(__GNUC__) static void get_xmm_reg0(xmm_reg_t& xmm_reg) { asm volatile("movdqu %%xmm0,%0" : "=m" (xmm_reg) ); } #elif defined(_MSC_VER) extern "C" void get_xmm_reg0(xmm_reg_t& xmm_reg); #endif #if defined(__GNUC__) static void set_mmx_reg0(mmx_reg_t& mmx_reg) { asm volatile("movq %0, %%mm0" : : "m" (mmx_reg) : "%mm0" ); } #elif defined(_MSC_VER) extern "C" void set_mmx_reg0(mmx_reg_t& mmx_reg); #endif #if defined(__GNUC__) static void get_mmx_reg0(mmx_reg_t& mmx_reg) { asm volatile("movq %%mm0,%0" : "=m" (mmx_reg) ); } #elif defined(_MSC_VER) extern "C" void get_mmx_reg0(mmx_reg_t& mmx_reg); #endif UINT32 init_sse(UINT32 z) { xmm_reg_t xmm; xmm.dword[0] = z; set_xmm_reg0(xmm); // from memory to register -- we modify the output using the tool get_xmm_reg0(xmm); // from register to memory return xmm.dword[0]; } UINT32 init_mmx(UINT32 z) { mmx_reg_t mmx; mmx.dword[0] = z; set_mmx_reg0(mmx);// from mem to register -- we modify the output of this one get_mmx_reg0(mmx); // from register to memory return mmx.dword[0]; } /* This part replaces the library version of atoi which causes problems in Intel(R) 64 on windows */ bool isdigit(char c) { return ((c >= '0') && (c <='9')); } int digitconv(char c) { return (c - '0'); } bool iswhitespace(char c) { return ((c == ' ') || (c == '\t') || (c == '\n')); } int str2int(char * str, int base = 10) { const int maxdigits = 9; if (NULL==str ) { return 0; } int it=0; //Preceding whitespaces while ( iswhitespace(str[it]) ) ++it; //Negative number? bool neg=false; if (str[it] == '-') { neg=true; ++it; } else if (str[it] == '+') { neg = false; ++it; } int num=0; for (int i=0; (i