You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
395 lines
14 KiB
395 lines
14 KiB
/*
|
|
* Copyright 2002-2019 Intel Corporation.
|
|
*
|
|
* This software is provided to you as Sample Source Code as defined in the accompanying
|
|
* End User License Agreement for the Intel(R) Software Development Products ("Agreement")
|
|
* section 1.L.
|
|
*
|
|
* This software and the related documents are provided as is, with no express or implied
|
|
* warranties, other than those that are expressly stated in the License.
|
|
*/
|
|
|
|
// sse-unaligned-class2.cpp
|
|
|
|
// This tool that provides a class that forces every SSE operation that
|
|
// reads or writes memory to work from an aligned buffer. There is one
|
|
// buffer for loads and one for stores, per thread.
|
|
|
|
// This tries to align more types of references than the
|
|
// sse-unaligned-class.cpp
|
|
|
|
// FIXME: 2007-05-09 This realigns ALL SSE operations that are not emulated
|
|
// already without checking for alignment. I should make this check for
|
|
// misalignment.
|
|
|
|
#include <cassert>
|
|
#include <cstdio>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <fstream>
|
|
#include <cstring>
|
|
#include <unistd.h>
|
|
|
|
#if defined(__GNUC__) && !defined(_WIN32)
|
|
# include <cstdint>
|
|
#endif
|
|
|
|
#include "pin.H"
|
|
extern "C" {
|
|
#include "xed-interface.h"
|
|
using std::cerr;
|
|
using std::string;
|
|
using std::endl;
|
|
}
|
|
|
|
|
|
#if defined(_WIN32)
|
|
# define uint8_t unsigned __int8
|
|
# define uint16_t unsigned __int16
|
|
# define uint32_t unsigned __int32
|
|
# define uint64_t unsigned __int64
|
|
# define int8_t __int8
|
|
# define int16_t __int16
|
|
# define int32_t __int32
|
|
# define int64_t __int64
|
|
# define uint_t unsigned int
|
|
#else
|
|
typedef unsigned int uint_t;
|
|
#endif
|
|
|
|
#if defined(__GNUC__)
|
|
// NOTE: macOS* XCODE2.4.1 gcc and Cgywin gcc 3.4.x only allow for 16b
|
|
// alignment!
|
|
# define SSE_ALIGN __attribute__ ((aligned(16)))
|
|
#else
|
|
# define SSE_ALIGN __declspec(align(16))
|
|
#endif
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
typedef uint8_t sse_aligned_buffer_t[16];
|
|
|
|
|
|
|
|
|
|
class thread_data_t
|
|
{
|
|
public:
|
|
thread_data_t() {}
|
|
sse_aligned_buffer_t SSE_ALIGN read;
|
|
sse_aligned_buffer_t SSE_ALIGN write;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
class sse_aligner_t
|
|
{
|
|
public:
|
|
sse_aligner_t()
|
|
: knob_output_file(KNOB_MODE_WRITEONCE,"pintool", "o",
|
|
"sse-unaligned.out", "specify profile file name"),
|
|
knob_pid(KNOB_MODE_WRITEONCE, "pintool", "i",
|
|
"0", "append pid to output"),
|
|
knob_loads(KNOB_MODE_WRITEONCE, "pintool", "align-sse-loads",
|
|
"1", "align unaligned SSE loads"),
|
|
knob_stores(KNOB_MODE_WRITEONCE, "pintool", "align-sse-stores",
|
|
"1", "align unaligned SSE stores"),
|
|
knob_verbose(KNOB_MODE_WRITEONCE, "pintool", "align-sse-verbose",
|
|
"0", "make the sse aligner verbose")
|
|
{
|
|
num_threads = 1;
|
|
active_threads = 1;
|
|
out = 0;
|
|
//NOTE: knob processing must happen in the activate() function.
|
|
}
|
|
|
|
~sse_aligner_t()
|
|
{
|
|
if (out)
|
|
out->close();
|
|
}
|
|
|
|
std::ofstream* out;
|
|
KNOB<string> knob_output_file;
|
|
KNOB<BOOL> knob_pid;
|
|
KNOB<BOOL> knob_loads;
|
|
KNOB<BOOL> knob_stores;
|
|
KNOB<BOOL> knob_verbose;
|
|
|
|
// key for accessing TLS storage in the threads. initialized once in main()
|
|
TLS_KEY tls_key;
|
|
|
|
uint32_t num_threads;
|
|
uint32_t active_threads;
|
|
bool realign_stores;
|
|
bool realign_loads;
|
|
bool verbose;
|
|
|
|
void activate()
|
|
{
|
|
//FIXME: only one aligner at a time -- not changing output file
|
|
//name based on a static count of the number of sse_aligner_t
|
|
//objects.
|
|
|
|
if (knob_verbose)
|
|
{
|
|
string filename = knob_output_file.Value();
|
|
if (knob_pid)
|
|
{
|
|
filename += "." + decstr(getpid());
|
|
}
|
|
out = new std::ofstream(filename.c_str());
|
|
}
|
|
realign_stores = (knob_stores==1);
|
|
realign_loads = (knob_loads==1);
|
|
verbose = (knob_verbose==1);
|
|
|
|
// obtain a key for TLS storage
|
|
tls_key = PIN_CreateThreadDataKey(0);
|
|
thread_data_t* tdata = new thread_data_t;
|
|
// remember the first thread's data for later
|
|
PIN_SetThreadData(tls_key, tdata, PIN_ThreadId());
|
|
|
|
PIN_AddThreadStartFunction(reinterpret_cast<THREAD_START_CALLBACK>(thread_begin),
|
|
this);
|
|
PIN_AddThreadFiniFunction(reinterpret_cast<THREAD_FINI_CALLBACK>(thread_end),
|
|
this);
|
|
|
|
TRACE_AddInstrumentFunction(reinterpret_cast<TRACE_INSTRUMENT_CALLBACK>(instrument_trace),
|
|
this);
|
|
if (verbose)
|
|
*out << "sse aligner activated" << endl;
|
|
}
|
|
|
|
|
|
|
|
thread_data_t* get_tls(THREADID tid)
|
|
{
|
|
thread_data_t* tdata =
|
|
static_cast<thread_data_t*>(PIN_GetThreadData(tls_key, tid));
|
|
return tdata;
|
|
}
|
|
|
|
|
|
static void thread_begin(THREADID tid, CONTEXT *ctxt, INT32 flags, sse_aligner_t *pthis)
|
|
{
|
|
if (pthis->verbose)
|
|
*(pthis->out) << "thead begin " << static_cast<uint32_t>(tid) << endl;
|
|
// This function is locked no need for a Pin Lock here
|
|
pthis->num_threads++;
|
|
pthis->active_threads++;
|
|
|
|
thread_data_t* tdata = new thread_data_t;
|
|
// remember my pointer for later
|
|
PIN_SetThreadData(pthis->tls_key, tdata, tid);
|
|
}
|
|
|
|
static void thread_end(THREADID tid, const CONTEXT *ctxt, INT32 code, sse_aligner_t *pthis)
|
|
{
|
|
thread_data_t* tdata = pthis->get_tls(tid);
|
|
delete tdata;
|
|
|
|
// This function is locked no need for a Pin Lock here
|
|
pthis->active_threads--;
|
|
}
|
|
|
|
|
|
|
|
static void rewrite_instruction(INS ins, bool is_read, sse_aligner_t* pthis)
|
|
{
|
|
//fprintf(stderr,"Rewriting %p\n",(void*)INS_Address(ins));
|
|
|
|
// Avoid aligning trivially aligned stuff
|
|
const xed_decoded_inst_t* xedd = INS_XedDec(ins);
|
|
if (xed_decoded_inst_get_memory_operand_length(xedd,0)
|
|
> sizeof (sse_aligned_buffer_t))
|
|
{
|
|
return;
|
|
}
|
|
xed_reg_enum_t breg1 = xed_decoded_inst_get_base_reg(xedd,0);
|
|
xed_reg_enum_t ireg = xed_decoded_inst_get_index_reg(xedd,0);
|
|
INT64 disp = 0;
|
|
if (xed_decoded_inst_get_memory_displacement_width(xedd,0))
|
|
disp = xed_decoded_inst_get_memory_displacement(xedd, 0);
|
|
if (breg1 == XED_REG_INVALID && ireg == XED_REG_INVALID) {
|
|
// displacement only... check its alignment
|
|
if ((disp & 0xF) == 0)
|
|
return;
|
|
}
|
|
else if (breg1== XED_REG_RIP) {
|
|
// rip-relative -- check alignment
|
|
ADDRINT ip = INS_Address(ins);
|
|
ADDRINT ea = ip + disp;
|
|
if ((ea & 0xF) == 0)
|
|
return;
|
|
}
|
|
if (pthis->verbose)
|
|
*(pthis->out) << "REWRITE "
|
|
<< string(is_read ? "LOAD :" : "STORE:")
|
|
<< std::setw(16)
|
|
<< std::hex
|
|
<< INS_Address(ins)
|
|
<< std::dec
|
|
<< " "
|
|
<< INS_Disassemble(ins) << std::endl;
|
|
|
|
if (is_read)
|
|
{
|
|
// Loads -- we change the load to use G0 as the base register and
|
|
// then add a "before" function that sets G0 and copies the data to
|
|
// an aligned bufffer.
|
|
INS_InsertCall(ins, IPOINT_BEFORE,
|
|
AFUNPTR(copy_to_aligned_load_buffer_and_return_pointer),
|
|
IARG_MEMORYREAD_EA,
|
|
IARG_MEMORYREAD_SIZE,
|
|
IARG_INST_PTR,
|
|
IARG_THREAD_ID,
|
|
IARG_PTR, pthis,
|
|
IARG_RETURN_REGS, REG_INST_G0,
|
|
IARG_END);
|
|
}
|
|
else
|
|
{
|
|
// Stores -- we change the store to use G0 as a base register and
|
|
// then add a "before" function to set G0 and an "after" function
|
|
// that copies the data from the aligned buffer to where it was
|
|
// supposed to go.
|
|
// Since we can't ask for the MEMORYWRITE_EA at IPOINT_AFTER, we save
|
|
// that in REG_INST_G1 at IPOINT_BEFORE and then use it at IPOINT_AFTER.
|
|
INS_InsertCall(ins, IPOINT_BEFORE,
|
|
AFUNPTR(return_pointer_to_aligned_store_buffer),
|
|
IARG_MEMORYWRITE_EA,
|
|
IARG_INST_PTR,
|
|
IARG_THREAD_ID,
|
|
IARG_PTR, pthis,
|
|
IARG_REG_REFERENCE, REG_INST_G1,
|
|
IARG_RETURN_REGS, REG_INST_G0,
|
|
IARG_END);
|
|
INS_InsertCall(ins, IPOINT_AFTER,
|
|
AFUNPTR(copy_from_aligned_store_buffer),
|
|
IARG_REG_VALUE, REG_INST_G1,
|
|
IARG_MEMORYWRITE_SIZE,
|
|
IARG_INST_PTR,
|
|
IARG_THREAD_ID,
|
|
IARG_PTR, pthis,
|
|
IARG_END);
|
|
}
|
|
|
|
// Rewrite the memory operand (we assume there's only one) to use the address in REG_INST_G0
|
|
INS_RewriteMemoryOperand (ins, 0, REG_INST_G0);
|
|
}
|
|
|
|
|
|
// Presumption here that SSE ops do not have RMW semantics, or more than one memory operand.
|
|
static bool check_for_sse_memop(INS ins, bool& is_read, sse_aligner_t* pthis)
|
|
{
|
|
// return true if the instruction is SSEx and reads/writes memory
|
|
xed_extension_enum_t extension = static_cast<xed_extension_enum_t>(INS_Extension(ins));
|
|
if (extension == XED_EXTENSION_SSE ||
|
|
extension == XED_EXTENSION_SSE2 ||
|
|
extension == XED_EXTENSION_SSE3 ||
|
|
extension == XED_EXTENSION_SSSE3 ||
|
|
extension == XED_EXTENSION_SSE4)
|
|
{
|
|
if (pthis->realign_loads && INS_IsMemoryRead(ins))
|
|
{
|
|
is_read = true;
|
|
return true;
|
|
}
|
|
if (pthis->realign_stores && INS_IsMemoryWrite(ins))
|
|
{
|
|
is_read = false;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void instrument_trace(TRACE trace, sse_aligner_t* pthis)
|
|
{
|
|
//sse_aligner_t* pthis = static_cast<sse_aligner_t*>(v);
|
|
bool is_read = false;
|
|
for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl))
|
|
for (INS ins = BBL_InsHead(bbl); INS_Valid(ins); ins = INS_Next(ins))
|
|
if (check_for_sse_memop(ins, is_read, pthis))
|
|
rewrite_instruction(ins, is_read, pthis);
|
|
}
|
|
|
|
static ADDRINT copy_to_aligned_load_buffer_and_return_pointer(ADDRINT load_addr,
|
|
ADDRINT byte_len,
|
|
ADDRINT ip,
|
|
THREADID tid,
|
|
sse_aligner_t* pthis)
|
|
{
|
|
// return the address to use for the SSEx operation
|
|
thread_data_t* tdata = pthis->get_tls(tid);
|
|
ADDRINT copied = PIN_SafeCopy(tdata->read,
|
|
reinterpret_cast<uint8_t*>(load_addr),
|
|
byte_len);
|
|
if (copied != byte_len)
|
|
{
|
|
// The copy failed, this happens if the data is accessing an unmapped page
|
|
// to cause a similar fault we access the faulting data here...
|
|
(void)*(reinterpret_cast<uint8_t*>(load_addr) + copied);
|
|
}
|
|
return reinterpret_cast<ADDRINT>(tdata->read);
|
|
}
|
|
|
|
static ADDRINT return_pointer_to_aligned_store_buffer(ADDRINT store_addr,
|
|
ADDRINT ip,
|
|
THREADID tid,
|
|
sse_aligner_t* pthis,
|
|
ADDRINT *saved_ea)
|
|
{
|
|
// return the address to use for the SSEx operation
|
|
thread_data_t* tdata = pthis->get_tls(tid);
|
|
// Save the effective address, we can't ask for it at IPOINT_AFTER.
|
|
*saved_ea = store_addr;
|
|
return reinterpret_cast<ADDRINT>(tdata->write);
|
|
}
|
|
|
|
static void copy_from_aligned_store_buffer(ADDRINT store_addr,
|
|
ADDRINT byte_len,
|
|
ADDRINT ip,
|
|
THREADID tid,
|
|
sse_aligner_t* pthis)
|
|
{
|
|
thread_data_t* tdata = pthis->get_tls(tid);
|
|
ADDRINT copied = PIN_SafeCopy(reinterpret_cast<uint8_t*>(store_addr),
|
|
tdata->write,
|
|
byte_len);
|
|
|
|
if (copied != byte_len)
|
|
{
|
|
// The copy failed, this happens if the data is accessing an unmapped page.
|
|
// To cause a similar fault we access the faulting data here...
|
|
*(reinterpret_cast<uint8_t*>(store_addr) + copied) = tdata->write[copied];
|
|
}
|
|
}
|
|
|
|
}; // class
|
|
|
|
|
|
int usage()
|
|
{
|
|
cerr << "Usage: ..." << endl;
|
|
cerr << KNOB_BASE::StringKnobSummary() << endl;
|
|
return 1;
|
|
}
|
|
|
|
int main(int argc, char * argv[])
|
|
{
|
|
static sse_aligner_t aligner; // must be before usage...
|
|
|
|
PIN_InitSymbols();
|
|
if (PIN_Init(argc, argv))
|
|
return usage();
|
|
|
|
aligner.activate();
|
|
|
|
// Never returns
|
|
PIN_StartProgram();
|
|
|
|
return 0;
|
|
}
|