You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

349 lines
9.3 KiB

/*
* Copyright 2002-2019 Intel Corporation.
*
* This software is provided to you as Sample Source Code as defined in the accompanying
* End User License Agreement for the Intel(R) Software Development Products ("Agreement")
* section 1.L.
*
* This software and the related documents are provided as is, with no express or implied
* warranties, other than those that are expressly stated in the License.
*/
/* ===================================================================== */
/*! @file
* This file contains a dynamic register/memory operand pattern profiler
*/
#include "pin.H"
#include <list>
#include <iostream>
#include <cassert>
#include <iomanip>
#include <fstream>
#include <stdlib.h>
#include <string.h>
using std::showpoint;
using std::ostream;
using std::list;
using std::cerr;
using std::string;
using std::endl;
/* ===================================================================== */
/* Commandline Switches */
/* ===================================================================== */
KNOB<string> KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool",
"o", "ldstmix.out", "specify profile file name");
KNOB<BOOL> KnobNoSharedLibs(KNOB_MODE_WRITEONCE, "pintool",
"no_shared_libs", "0", "do not instrument shared libraries");
KNOB<UINT32> KnobMaxThreads(KNOB_MODE_WRITEONCE, "pintool",
"threads", "100", "Maximum number of threads");
/* ===================================================================== */
/* Print Help Message */
/* ===================================================================== */
INT32 Usage()
{
cerr <<
"This pin tool computes a dynamic register/memory pattern mix profile\n"
"\n";
cerr << KNOB_BASE::StringKnobSummary();
cerr << endl;
return -1;
}
/* ===================================================================== */
/* Global Variables */
/* ===================================================================== */
UINT32 MaxNumThreads = 1;
typedef UINT64 COUNTER;
typedef enum {
PATTERN_INVALID,
PATTERN_MEM_RW,
PATTERN_MEM_R,
PATTERN_MEM_W,
PATTERN_NO_MEM,
PATTERN_NO_MEM_LIES,
PATTERN_LAST
} pattern_t;
char const *
pattern_t2str(pattern_t x)
{
switch(x) {
case PATTERN_INVALID: return "INVALID";
case PATTERN_MEM_RW: return "MEM_RW";
case PATTERN_MEM_R: return "MEM_R";
case PATTERN_MEM_W: return "MEM_W";
case PATTERN_NO_MEM: return "NO_MEM";
case PATTERN_NO_MEM_LIES: return "NO_MEM_LIES";
case PATTERN_LAST: return "LAST";
}
assert(0);
/* NOTREACHED */
return 0;
}
typedef struct
{
COUNTER pattern[PATTERN_LAST];
} STATS;
STATS GlobalStats;
class BBLSTATS
{
public:
BBLSTATS(UINT16 * stats)
: _stats(stats)
{
_counter = new COUNTER[MaxNumThreads];
memset(_counter,0,sizeof(COUNTER)*MaxNumThreads);
};
//array of uint16, one per instr in the block, 0 terminated
const UINT16 * _stats;
// one ctr per thread to avoid runtime locking at the expense of memory
COUNTER* _counter;
};
list<const BBLSTATS*> statsList;
//////////////////////////////////////////////////////////
PIN_LOCK pinLock;
UINT32 numThreads = 0;
VOID ThreadStart(THREADID threadid, CONTEXT *ctxt, INT32 flags, VOID *v)
{
PIN_GetLock(&pinLock, threadid+1);
numThreads++;
PIN_ReleaseLock(&pinLock);
ASSERT(numThreads <= MaxNumThreads, "Maximum number of threads exceeded\n");
}
/* ===================================================================== */
VOID ComputeGlobalStats()
{
for(UINT32 i=0;i<PATTERN_LAST;i++)
GlobalStats.pattern[i] = 0;
// We have the count for each bbl and its stats, compute the summary
for (list<const BBLSTATS*>::iterator bi = statsList.begin(); bi != statsList.end(); bi++)
for (const UINT16 * stats = (*bi)->_stats; *stats; stats++)
for(UINT32 thd = 0 ; thd < numThreads; thd++)
GlobalStats.pattern[*stats] += (*bi)->_counter[thd];
}
/* ===================================================================== */
/* ===================================================================== */
VOID docount(COUNTER * counter, THREADID tid)
{
counter[tid]++;
}
INT32 RecordRegisters(BBL bbl,
UINT16 * stats,
UINT32 max_stats)
{
UINT32 count = 0;
for (INS ins = BBL_InsHead(bbl); INS_Valid(ins); ins = INS_Next(ins))
{
if (count >= max_stats)
{
cerr << "Too many stats in this block" << endl;
exit(1);
}
bool rmem = INS_IsMemoryRead(ins) || INS_HasMemoryRead2(ins);
bool wmem = INS_IsMemoryWrite(ins);
bool rw_mem = rmem & wmem;
if (rw_mem)
stats[count++] = PATTERN_MEM_RW;
else if (rmem)
stats[count++] = PATTERN_MEM_R;
else if (wmem)
stats[count++] = PATTERN_MEM_W;
else if (INS_SegmentRegPrefix(ins) != REG_INVALID())
stats[count++] = PATTERN_NO_MEM_LIES;
else
stats[count++] = PATTERN_NO_MEM;
}
stats[count++] = 0;
return count;
}
/* ===================================================================== */
VOID Trace(TRACE trace, VOID *v)
{
const RTN rtn = TRACE_Rtn(trace);
if (! RTN_Valid(rtn))
return;
const SEC sec = RTN_Sec(rtn);
ASSERTX(SEC_Valid(sec));
const IMG img = SEC_Img(sec);
if (!IMG_Valid(img))
return;
if ( KnobNoSharedLibs.Value() && IMG_Type(img) == IMG_TYPE_SHAREDLIB)
return;
for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl))
{
// Record the registers into a dummy buffer so we can count them
#define MAX_STATS_PER_BLOCK (128*1024)
UINT16 buffer[MAX_STATS_PER_BLOCK];
INT32 count = RecordRegisters(bbl, buffer, MAX_STATS_PER_BLOCK);
ASSERTX(count < MAX_STATS_PER_BLOCK);
// Summarize the stats for the bbl in a 0 terminated list
// This is done at instrumentation time
UINT16 * stats = new UINT16[count];
memcpy(stats, buffer, count * sizeof(UINT16));
// Insert instrumentation to count the number of times the bbl is executed
BBLSTATS * bblstats = new BBLSTATS(stats);
INS_InsertCall(BBL_InsHead(bbl), IPOINT_BEFORE, AFUNPTR(docount),
IARG_PTR, bblstats->_counter,
IARG_THREAD_ID,
IARG_END);
// Remember the counter and stats so we can compute a summary at the end
statsList.push_back(bblstats);
}
}
/* ===================================================================== */
VOID EmitPerThreadStats(ostream* out)
{
*out << std::setprecision(4) << showpoint;
for(UINT32 thd = 0 ; thd < numThreads; thd++)
{
STATS ThreadStats;
for(UINT32 i=0;i<PATTERN_LAST;i++)
ThreadStats.pattern[i] = 0;
for (list<const BBLSTATS*>::iterator bi = statsList.begin(); bi != statsList.end(); bi++)
for (const UINT16 * stats = (*bi)->_stats; *stats; stats++)
ThreadStats.pattern[*stats] += (*bi)->_counter[thd];
COUNTER total = 0;
for (int i = PATTERN_INVALID+1; i < PATTERN_LAST; i++)
total += ThreadStats.pattern[i];
*out << "Thread " << thd << endl;
for (int i = PATTERN_INVALID+1; i < PATTERN_LAST; i++)
*out << ljstr(pattern_t2str(static_cast<pattern_t>(i)),15)
<< decstr( ThreadStats.pattern[i],12)
<< "\t"
<< std::setw(10)
<< 100.0*ThreadStats.pattern[i]/total
<< std::endl;
*out << endl;
}
}
static std::ofstream* out = 0;
VOID Fini(int, VOID * v)
{
ComputeGlobalStats();
*out <<
"#\n"
"#pattern-type count percent\n"
"#\n";
*out << "All Threads" << endl;
COUNTER total = 0;
for (int i = PATTERN_INVALID+1; i < PATTERN_LAST; i++)
total += GlobalStats.pattern[i];
*out << std::setprecision(4) << showpoint;
for (int i = PATTERN_INVALID+1; i < PATTERN_LAST; i++)
*out << ljstr(pattern_t2str(static_cast<pattern_t>(i)),15)
<< decstr( GlobalStats.pattern[i],12)
<< "\t"
<< std::setw(10)
<< 100.0*GlobalStats.pattern[i]/total
<< std::endl;
*out<< endl;
EmitPerThreadStats(out);
*out << "# eof" << endl;
out->close();
}
/* ===================================================================== */
/* Main */
/* ===================================================================== */
int main(int argc, CHAR *argv[])
{
PIN_InitSymbols();
if( PIN_Init(argc,argv) )
{
return Usage();
}
PIN_InitLock(&pinLock);
MaxNumThreads = KnobMaxThreads.Value();
out = new std::ofstream(KnobOutputFile.Value().c_str());
PIN_AddThreadStartFunction(ThreadStart, 0);
TRACE_AddInstrumentFunction(Trace, 0);
PIN_AddFiniFunction(Fini, 0);
// Never returns
PIN_StartProgram();
return 0;
}
/* ===================================================================== */
/* eof */
/* ===================================================================== */