You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
5.2 KiB

/*
* Copyright 2002-2019 Intel Corporation.
*
* This software is provided to you as Sample Source Code as defined in the accompanying
* End User License Agreement for the Intel(R) Software Development Products ("Agreement")
* section 1.L.
*
* This software and the related documents are provided as is, with no express or implied
* warranties, other than those that are expressly stated in the License.
*/
#include <iostream>
#include <iomanip>
using std::cout;
using std::endl;
#define N 1024
int main(int argc, char** argv);
#if defined( __GNUC__)
#include <stdint.h>
typedef uint8_t UINT8; //LINUX HOSTS
typedef uint16_t UINT16;
typedef uint32_t UINT32;
typedef uint64_t UINT64;
typedef int8_t INT8;
typedef int16_t INT16;
typedef int32_t INT32;
typedef int64_t INT64;
#define ALIGN16 __attribute__ ((aligned(16)))
#define ALIGN8 __attribute__ ((aligned(8)))
#elif defined(_MSC_VER)
typedef unsigned __int8 UINT8 ;
typedef unsigned __int16 UINT16;
typedef unsigned __int32 UINT32;
typedef unsigned __int64 UINT64;
typedef __int8 INT8;
typedef __int16 INT16;
typedef __int32 INT32;
typedef __int64 INT64;
#define ALIGN16 __declspec(align(16))
#define ALIGN8 __declspec(align(8))
#else
#error Expect usage of either GNU or MS compiler.
#endif
#define MAX_XMM_REGS 16
#define MAX_BYTES_PER_XMM_REG 16
#define MAX_WORDS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/2)
#define MAX_DWORDS_PER_XMM_REG (MAX_WORDS_PER_XMM_REG/2)
#define MAX_QWORDS_PER_XMM_REG (MAX_DWORDS_PER_XMM_REG/2)
#define MAX_FLOATS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(float))
#define MAX_DOUBLES_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(double))
#define MAX_MMX_REGS 8
#define MAX_BYTES_PER_MMX_REG 8
#define MAX_WORDS_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/2)
#define MAX_DWORDS_PER_MMX_REG (MAX_WORDS_PER_MMX_REG/2)
#define MAX_QWORDS_PER_MMX_REG (MAX_DWORDS_PER_MMX_REG/2)
#define MAX_FLOATS_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/sizeof(float))
#define MAX_DOUBLES_PER_MMX_REG (MAX_BYTES_PER_MMX_REG/sizeof(double))
union ALIGN16 xmm_reg_t
{
UINT8 byte[MAX_BYTES_PER_XMM_REG];
UINT16 word[MAX_WORDS_PER_XMM_REG];
UINT32 dword[MAX_DWORDS_PER_XMM_REG];
UINT64 qword[MAX_QWORDS_PER_XMM_REG];
INT8 s_byte[MAX_BYTES_PER_XMM_REG];
INT16 s_word[MAX_WORDS_PER_XMM_REG];
INT32 s_dword[MAX_DWORDS_PER_XMM_REG];
INT64 s_qword[MAX_QWORDS_PER_XMM_REG];
float flt[MAX_FLOATS_PER_XMM_REG];
double dbl[MAX_DOUBLES_PER_XMM_REG];
};
union ALIGN8 mmx_reg_t
{
UINT8 byte[MAX_BYTES_PER_MMX_REG];
UINT16 word[MAX_WORDS_PER_MMX_REG];
UINT32 dword[MAX_DWORDS_PER_MMX_REG];
UINT64 qword[MAX_QWORDS_PER_MMX_REG];
INT8 s_byte[MAX_BYTES_PER_MMX_REG];
INT16 s_word[MAX_WORDS_PER_MMX_REG];
INT32 s_dword[MAX_DWORDS_PER_MMX_REG];
INT64 s_qword[MAX_QWORDS_PER_MMX_REG];
float flt[MAX_FLOATS_PER_MMX_REG];
double dbl[MAX_DOUBLES_PER_MMX_REG];
};
#if defined(__GNUC__)
static void set_xmm_reg0(xmm_reg_t& xmm_reg)
{
asm volatile("movdqu %0, %%xmm0" : : "m" (xmm_reg) : "%xmm0" );
}
#elif defined(_MSC_VER)
extern "C" void set_xmm_reg0(xmm_reg_t& xmm_reg);
#endif
#if defined(__GNUC__)
static void get_xmm_reg0(xmm_reg_t& xmm_reg)
{
asm volatile("movdqu %%xmm0,%0" : "=m" (xmm_reg) );
}
#elif defined(_MSC_VER)
extern "C" void get_xmm_reg0(xmm_reg_t& xmm_reg);
#endif
#if defined(__GNUC__)
static void set_mmx_reg0(mmx_reg_t& mmx_reg)
{
asm volatile("movq %0, %%mm0" : : "m" (mmx_reg) : "%mm0" );
}
#elif defined(_MSC_VER)
extern "C" void set_mmx_reg0(mmx_reg_t& mmx_reg);
#endif
#if defined(__GNUC__)
static void get_mmx_reg0(mmx_reg_t& mmx_reg)
{
asm volatile("movq %%mm0,%0" : "=m" (mmx_reg) );
}
#elif defined(_MSC_VER)
extern "C" void get_mmx_reg0(mmx_reg_t& mmx_reg);
#endif
UINT32 init_sse(UINT32 z)
{
xmm_reg_t xmm;
xmm.dword[0] = z;
set_xmm_reg0(xmm); // from memory to register -- we modify the output using the tool
get_xmm_reg0(xmm); // from register to memory
return xmm.dword[0];
}
UINT32 init_mmx(UINT32 z)
{
mmx_reg_t mmx;
mmx.dword[0] = z;
set_mmx_reg0(mmx);// from mem to register -- we modify the output of this one
get_mmx_reg0(mmx); // from register to memory
return mmx.dword[0];
}
/* This part replaces the library version of atoi which causes problems in Intel(R) 64 on windows */
bool isdigit(char c)
{
return ((c >= '0') && (c <='9'));
}
int digitconv(char c)
{
return (c - '0');
}
bool iswhitespace(char c)
{
return ((c == ' ') || (c == '\t') || (c == '\n'));
}
int str2int(char * str, int base = 10)
{
const int maxdigits = 9;
if (NULL==str )
{
return 0;
}
int it=0;
//Preceding whitespaces
while ( iswhitespace(str[it]) )
++it;
//Negative number?
bool neg=false;
if (str[it] == '-') { neg=true; ++it; }
else if (str[it] == '+') { neg = false; ++it; }
int num=0;
for (int i=0; (i<maxdigits) && (isdigit(str[it])); ++i, ++it)
/* Note that since the null char is not a digit the program will behave correctly. */
{
num*=base;
num+=digitconv(str[it]);
}
return num*(neg?-1:1);
}
/* End of atoi replacement */
int main(int argc, char** argv)
{
UINT32 x = init_sse(str2int(argv[1]));
cout << x << endl;
UINT32 y = init_mmx(x);
cout << y << endl;
return 0;
}