/* * Copyright 2002-2019 Intel Corporation. * * This software is provided to you as Sample Source Code as defined in the accompanying * End User License Agreement for the Intel(R) Software Development Products ("Agreement") * section 1.L. * * This software and the related documents are provided as is, with no express or implied * warranties, other than those that are expressly stated in the License. */ /* ===================================================================== */ /*! @file This file contains a static and dynamic opcode/ISA extension/ISA * category mix profiler * * This is derived from mix.cpp. Handles an arbitrary number of threads * using TLS for data storage and avoids locking, except during I/O. */ #include #include #include #include #include #include #include #include #include "pin.H" #include "instlib.H" using namespace INSTLIB; // key for accessing TLS storage in the threads. initialized once in main() static TLS_KEY tls_key; typedef UINT32 stat_index_t; #if defined(TARGET_IA32) || defined(TARGET_IA32E) static string disassemble(UINT64 start, UINT64 stop); #endif /* ===================================================================== */ /* Commandline Switches */ /* ===================================================================== */ KNOB KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool", "o", "mix.out", "specify profile file name"); KNOB KnobTopBlocks(KNOB_MODE_WRITEONCE, "pintool", "top_blocks", "20", "specify a maximal number of top blocks for which icounts are printed"); #if defined(TARGET_IA32) || defined(TARGET_IA32E) KNOB KnobShowDisassembly(KNOB_MODE_WRITEONCE, "pintool", "disas", "0", "Show disassembly for top blocks"); #endif KNOB KnobPid(KNOB_MODE_WRITEONCE, "pintool", "i", "0", "append pid to output"); KNOB KnobProfilePredicated(KNOB_MODE_WRITEONCE, "pintool", "p", "0", "enable accurate profiling for predicated instructions"); KNOB KnobProfileStaticOnly(KNOB_MODE_WRITEONCE, "pintool", "s", "0", "terminate after collection of static profile for main image"); #ifndef TARGET_WINDOWS KNOB KnobProfileDynamicOnly(KNOB_MODE_WRITEONCE, "pintool", "d", "0", "Only collect dynamic profile"); #else KNOB KnobProfileDynamicOnly(KNOB_MODE_WRITEONCE, "pintool", "d", "1", "Only collect dynamic profile"); #endif KNOB KnobNoSharedLibs(KNOB_MODE_WRITEONCE, "pintool", "no_shared_libs", "0", "do not instrument shared libraries"); KNOB KnobInstructionLengthMix(KNOB_MODE_WRITEONCE, "pintool","ilen", "0", "Compute instruction length mix"); KNOB KnobCategoryMix(KNOB_MODE_WRITEONCE, "pintool", "category", "0", "Compute ISA category mix"); KNOB KnobIformMix(KNOB_MODE_WRITEONCE, "pintool", "iform", "0", "Compute ISA iform mix"); KNOB KnobMapToFile(KNOB_MODE_WRITEONCE, "pintool", "mapaddr", "0", "Map Addresses to File/Line information"); typedef enum { measure_opcode=0, measure_category=1, measure_ilen=2, measure_iform=3 } measurement_t; measurement_t measurement = measure_opcode; /* ===================================================================== */ INT32 Usage() { cerr << "This pin tool computes a static and dynamic opcode, " << "instruction form, instruction length, extension or category mix profile\n\n"; cerr << KNOB_BASE::StringKnobSummary(); cerr << endl; cerr << "The default is to do opcode and ISA extension profileing" << endl; cerr << "At most one of -iform, -ilen or -category is allowed" << endl; cerr << endl; return -1; } /* ===================================================================== */ /* INDEX HELPERS */ /* ===================================================================== */ const UINT32 INDEX_SPECIAL = 3000; const UINT32 MAX_MEM_SIZE = 520; const UINT32 MAX_EXTENSION = 50; const UINT32 INDEX_TOTAL = INDEX_SPECIAL + 0; const UINT32 INDEX_MEM_ATOMIC = INDEX_SPECIAL + 1; const UINT32 INDEX_STACK_READ = INDEX_SPECIAL + 2; const UINT32 INDEX_STACK_WRITE = INDEX_SPECIAL + 3; const UINT32 INDEX_IPREL_READ = INDEX_SPECIAL + 4; const UINT32 INDEX_IPREL_WRITE = INDEX_SPECIAL + 5; const UINT32 INDEX_MEM_READ_SIZE = INDEX_SPECIAL + 6; const UINT32 INDEX_MEM_WRITE_SIZE = INDEX_SPECIAL + 6 + MAX_MEM_SIZE; const UINT32 INDEX_EXTENSION = INDEX_SPECIAL + 6 + 2*MAX_MEM_SIZE; const UINT32 INDEX_FMA_BASE = INDEX_EXTENSION + MAX_EXTENSION; const UINT32 INDEX_FMA = INDEX_FMA_BASE + 1; const UINT32 INDEX_FMA_ADD = INDEX_FMA_BASE + 2; const UINT32 INDEX_FMA_MUL = INDEX_FMA_BASE + 3; const UINT32 INDEX_FMA_S = INDEX_FMA_BASE + 4; const UINT32 INDEX_FMA_S_ADD = INDEX_FMA_BASE + 5; // NOTE: skipped 6. does not matter const UINT32 INDEX_FMA_S_MUL = INDEX_FMA_BASE + 7; const UINT32 INDEX_FMA_D = INDEX_FMA_BASE + 8; const UINT32 INDEX_FMA_D_ADD = INDEX_FMA_BASE + 9; const UINT32 INDEX_FMA_D_MUL = INDEX_FMA_BASE + 10; const UINT32 INDEX_FPMA = INDEX_FMA_BASE + 11; const UINT32 INDEX_FPMA_ADD = INDEX_FMA_BASE + 12; const UINT32 INDEX_FPMA_MUL = INDEX_FMA_BASE + 13; const UINT32 INDEX_FMS = INDEX_FMA_BASE + 14; const UINT32 INDEX_FMS_SUB = INDEX_FMA_BASE + 15; const UINT32 INDEX_FMS_MUL = INDEX_FMA_BASE + 16; const UINT32 INDEX_FMS_S = INDEX_FMA_BASE + 17; const UINT32 INDEX_FMS_S_SUB = INDEX_FMA_BASE + 18; const UINT32 INDEX_FMS_S_MUL = INDEX_FMA_BASE + 19; const UINT32 INDEX_FMS_D = INDEX_FMA_BASE + 20; const UINT32 INDEX_FMS_D_SUB = INDEX_FMA_BASE + 21; const UINT32 INDEX_FMS_D_MUL = INDEX_FMA_BASE + 22; const UINT32 INDEX_FPMS = INDEX_FMA_BASE + 23; const UINT32 INDEX_FPMS_SUB = INDEX_FMA_BASE + 24; const UINT32 INDEX_FPMS_MUL = INDEX_FMA_BASE + 25; const UINT32 INDEX_FNMA = INDEX_FMA_BASE + 26; const UINT32 INDEX_FNMA_ADD = INDEX_FMA_BASE + 27; const UINT32 INDEX_FNMA_MUL = INDEX_FMA_BASE + 28; const UINT32 INDEX_FNMA_S = INDEX_FMA_BASE + 29; const UINT32 INDEX_FNMA_S_ADD = INDEX_FMA_BASE + 30; const UINT32 INDEX_FNMA_S_MUL = INDEX_FMA_BASE + 31; const UINT32 INDEX_FNMA_D = INDEX_FMA_BASE + 32; const UINT32 INDEX_FNMA_D_ADD = INDEX_FMA_BASE + 33; const UINT32 INDEX_FNMA_D_MUL = INDEX_FMA_BASE + 34; const UINT32 INDEX_FPNMA = INDEX_FMA_BASE + 35; const UINT32 INDEX_FPNMA_ADD = INDEX_FMA_BASE + 36; const UINT32 INDEX_FPNMA_MUL = INDEX_FMA_BASE + 37; const UINT32 INDEX_SPECIAL_END = INDEX_FMA_BASE + 38; BOOL IsMemReadIndex(UINT32 i) { return (INDEX_MEM_READ_SIZE <= i && i < INDEX_MEM_READ_SIZE + MAX_MEM_SIZE ); } BOOL IsMemWriteIndex(UINT32 i) { return (INDEX_MEM_WRITE_SIZE <= i && i < INDEX_MEM_WRITE_SIZE + MAX_MEM_SIZE ); } /* ===================================================================== */ LOCALFUN UINT32 INS_GetIndex(INS ins) { UINT32 index = 0; switch(measurement) { case measure_opcode: index = INS_Opcode(ins); break; case measure_ilen: index = INS_Size(ins); break; case measure_category: index = INS_Category(ins); break; case measure_iform: { #if defined(TARGET_IA32) || defined(TARGET_IA32E) xed_decoded_inst_t* xedd = INS_XedDec(ins); xed_iform_enum_t iform = xed_decoded_inst_get_iform_enum(xedd); index = static_cast(iform); #endif } break; } return index; } /* ===================================================================== */ LOCALFUN BOOL INS_IsFMA(INS ins) { return FALSE; } /* ===================================================================== */ LOCALFUN UINT32 IndexStringLength(BBL bbl, BOOL memory_access_profile) { UINT32 count = 0; for (INS ins = BBL_InsHead(bbl); INS_Valid(ins); ins = INS_Next(ins)) { count++; // one for the ins if (measurement != measure_iform) count++; // one for the ISA extension. if( measurement == measure_opcode && memory_access_profile ) { if( INS_IsMemoryRead(ins) ) count++; // for size if( INS_IsStackRead(ins) ) count++; if( INS_IsIpRelRead(ins) ) count++; if( INS_IsMemoryWrite(ins) ) count++; // for size if( INS_IsStackWrite(ins) ) count++; if( INS_IsIpRelWrite(ins) ) count++; if( INS_IsAtomicUpdate(ins) ) count++; if( INS_IsFMA(ins) ) count++; } } return count; } /* ===================================================================== */ LOCALFUN UINT32 MemsizeToIndex(UINT32 size, BOOL write) { return (write ? INDEX_MEM_WRITE_SIZE : INDEX_MEM_READ_SIZE ) + size; } LOCALFUN stat_index_t *INS_GenerateIndexFMA(INS ins, stat_index_t *stats) { return stats; } /* ===================================================================== */ LOCALFUN stat_index_t* INS_GenerateIndexString(INS ins, stat_index_t *stats, BOOL memory_access_profile) { *stats++ = INS_GetIndex(ins); if (measurement != measure_iform) *stats++ = INS_Extension(ins) + INDEX_EXTENSION; if( measurement == measure_opcode && memory_access_profile ) { if( INS_IsMemoryRead(ins) ) *stats++ = MemsizeToIndex( INS_MemoryReadSize(ins), 0 ); if( INS_IsMemoryWrite(ins) ) *stats++ = MemsizeToIndex( INS_MemoryWriteSize(ins), 1 ); if( INS_IsAtomicUpdate(ins) ) *stats++ = INDEX_MEM_ATOMIC; if( INS_IsStackRead(ins) ) *stats++ = INDEX_STACK_READ; if( INS_IsStackWrite(ins) ) *stats++ = INDEX_STACK_WRITE; if( INS_IsIpRelRead(ins) ) *stats++ = INDEX_IPREL_READ; if( INS_IsIpRelWrite(ins) ) *stats++ = INDEX_IPREL_WRITE; } return stats; } /* ===================================================================== */ LOCALFUN string IndexToString( UINT32 index ) { if (measurement == measure_iform) { #if defined(TARGET_IA32) || defined(TARGET_IA32E) return xed_iform_enum_t2str(static_cast(index)); #else return "???"; #endif } if( INDEX_SPECIAL <= index && index < INDEX_SPECIAL_END) { if( index == INDEX_TOTAL ) return "*total"; else if( IsMemReadIndex(index) ) return "*mem-read-" + decstr( index - INDEX_MEM_READ_SIZE ); else if( IsMemWriteIndex(index)) return "*mem-write-" + decstr( index - INDEX_MEM_WRITE_SIZE ); else if( index == INDEX_MEM_ATOMIC ) return "*mem-atomic"; else if( index == INDEX_STACK_READ ) return "*stack-read"; else if( index == INDEX_STACK_WRITE ) return "*stack-write"; else if( index == INDEX_IPREL_READ ) return "*iprel-read"; else if( index == INDEX_IPREL_WRITE ) return "*iprel-write"; else if (index >= INDEX_EXTENSION && index < INDEX_EXTENSION + MAX_EXTENSION) return "*isa-ext-" + EXTENSION_StringShort(index - INDEX_EXTENSION); else if ( index == INDEX_FMA ) return "*FMA"; else if ( index == INDEX_FMA_ADD ) return "*FMA_ADD"; else if ( index == INDEX_FMA_MUL ) return "*FMA_MUL"; else if ( index == INDEX_FMA_S ) return "*FMA_S"; else if ( index == INDEX_FMA_S_ADD ) return "*FMA_S_ADD"; else if ( index == INDEX_FMA_S_MUL ) return "*FMA_S_MUL"; else if ( index == INDEX_FMA_D ) return "*FMA_D"; else if ( index == INDEX_FMA_D_ADD ) return "*FMA_D_ADD"; else if ( index == INDEX_FMA_D_MUL ) return "*FMA_D_MUL"; else if ( index == INDEX_FPMA ) return "*FPMA"; else if ( index == INDEX_FPMA_ADD ) return "*FPMA_ADD"; else if ( index == INDEX_FPMA_MUL ) return "*FPMA_MUL"; else if ( index == INDEX_FMS ) return "*FMS"; else if ( index == INDEX_FMS_SUB ) return "*FMS_SUB"; else if ( index == INDEX_FMS_MUL ) return "*FMS_MUL"; else if ( index == INDEX_FMS_S ) return "*FMS_S"; else if ( index == INDEX_FMS_S_SUB ) return "*FMS_S_SUB"; else if ( index == INDEX_FMS_S_MUL ) return "*FMS_S_MUL"; else if ( index == INDEX_FMS_D ) return "*FMS_D"; else if ( index == INDEX_FMS_D_SUB ) return "*FMS_D_SUB"; else if ( index == INDEX_FMS_D_MUL ) return "*FMS_D_MUL"; else if ( index == INDEX_FPMS ) return "*FPMS"; else if ( index == INDEX_FPMS_SUB ) return "*FPMS_SUB"; else if ( index == INDEX_FPMS_MUL ) return "*FPMS_MUL"; else if ( index == INDEX_FNMA ) return "*FNMA"; else if ( index == INDEX_FNMA_ADD ) return "*FNMA_ADD"; else if ( index == INDEX_FNMA_MUL ) return "*FNMA_MUL"; else if ( index == INDEX_FNMA_S ) return "*FNMA_S"; else if ( index == INDEX_FNMA_S_ADD ) return "*FNMA_S_ADD"; else if ( index == INDEX_FNMA_S_MUL ) return "*FNMA_S_MUL"; else if ( index == INDEX_FNMA_D ) return "*FNMA_D"; else if ( index == INDEX_FNMA_D_ADD ) return "*FNMA_D_ADD"; else if ( index == INDEX_FNMA_D_MUL ) return "*FNMA_D_MUL"; else if ( index == INDEX_FPNMA ) return "*FPNMA"; else if ( index == INDEX_FPNMA_ADD ) return "*FPNMA_ADD"; else if ( index == INDEX_FPNMA_MUL ) return "*FPNMA_MUL"; else { ASSERTX(0); return ""; } } else if (measurement == measure_ilen) { ostringstream s; s << "ILEN-" << index; return s.str(); } else if (measurement == measure_opcode) { return OPCODE_StringShort(index); } else if (measurement == measure_category) { return CATEGORY_StringShort(index); } ASSERTX(0); return ""; } /* ===================================================================== */ /* ===================================================================== */ typedef UINT64 COUNTER; /* zero initialized */ typedef map stat_map_t; class CSTATS { public: CSTATS() { clear(); } stat_map_t unpredicated; stat_map_t predicated; stat_map_t predicated_true; VOID clear() { unpredicated.erase(unpredicated.begin(),unpredicated.end()); predicated.erase(predicated.begin(),predicated.end()); predicated_true.erase(predicated_true.begin(),predicated_true.end()); } }; class BBL_SORT_STATS { public: ADDRINT _pc; UINT64 _icount; UINT64 _executions; UINT64 _nbytes; }; CSTATS GlobalStatsStatic; // summary stats for static analysis class BBLSTATS { // Our first pass sets up the types of stats we need to update for this // block. We have one stat per instruction in the block. The _stats // array is null terminated. public: const stat_index_t* const _stats; const ADDRINT _pc; // start PC of the block const UINT32 _ninst; // # of instructions const UINT32 _nbytes; // # of bytes in the block BBLSTATS(stat_index_t* stats, ADDRINT pc, UINT32 ninst, UINT32 nbytes) : _stats(stats), _pc(pc), _ninst(ninst), _nbytes(nbytes) { }; }; LOCALVAR vector statsList; /* ===================================================================== */ #if defined(__GNUC__) # if defined(TARGET_MAC) || defined(TARGET_WINDOWS) // macOS* XCODE2.4.1 gcc and Cgywin gcc 3.4.x only allow for 16b // alignment! So we need to pad! # define ALIGN_LOCK __attribute__ ((aligned(16))) # define NEED_TO_PAD # else # define ALIGN_LOCK __attribute__ ((aligned(64))) # endif #else # define ALIGN_LOCK __declspec(align(64)) #endif #if defined(NEED_TO_PAD) LOCALVAR char pad0[48]; #endif LOCALVAR PIN_LOCK ALIGN_LOCK pinLock; #if defined(NEED_TO_PAD) LOCALVAR char pad1[48]; #endif LOCALVAR PIN_LOCK ALIGN_LOCK bbl_list_lock; #if defined(NEED_TO_PAD) LOCALVAR char pad2[48]; #endif static std::ofstream* out; class thread_data_t { public: thread_data_t() : enabled(0) { } CSTATS cstats; UINT32 enabled; vector block_counts; UINT32 size() { UINT32 limit; limit = block_counts.size(); return limit; } void resize(UINT32 n) { if (size() < n) block_counts.resize(2*n); } }; thread_data_t* get_tls(THREADID tid) { thread_data_t* tdata = static_cast(PIN_GetThreadData(tls_key, tid)); return tdata; } VOID activate_counting(THREADID tid) { thread_data_t* tdata = get_tls(tid); tdata->enabled = 1; } VOID deactivate_counting(THREADID tid) { thread_data_t* tdata = get_tls(tid); tdata->enabled = 0; } UINT32 numThreads = 0; VOID ThreadStart(THREADID tid, CONTEXT *ctxt, INT32 flags, VOID *v) { // This function is locked no need for a Pin Lock here numThreads++; PIN_GetLock(&pinLock, tid+1); // for output *out << "# Starting tid " << tid << endl; PIN_ReleaseLock(&pinLock); thread_data_t* tdata = new thread_data_t; // remember my pointer for later PIN_SetThreadData(tls_key, tdata, tid); // make sure the thread is counting stuff. // FIXME: The controller should start all threads if no trigger // conditions are specified, but currently it only starts // TID0. Starting here is wrong if the controller has a nontrivial // starting condition, but this is what most people want. They can // always stop the controller and zero the stats using markers as a // workaround. if (tid) activate_counting(tid); } VOID emit_stats(THREADID tid); //forward prototype VOID emit_pc_stats(THREADID tid); //forward prototype VOID zero_stats(THREADID tid); //forward prototype VOID emit_bbl_stats_sorted(THREADID tid); LOCALVAR CONTROL_MANAGER control; LOCALFUN VOID Handler(EVENT_TYPE ev, VOID *val, CONTEXT *ctxt, VOID *ip, THREADID tid, bool bcast) { switch(ev) { case EVENT_START: PIN_GetLock(&pinLock, tid+1); // for output *out << "# Start counting for tid " << tid << endl; PIN_ReleaseLock(&pinLock); activate_counting(tid); break; case EVENT_STOP: PIN_GetLock(&pinLock, tid+1); // for output *out << "# Stop counting for tid " << tid << endl; if (control.PinPointsActive()) { UINT32 pp = control.CurrentPp(tid); UINT32 phase = control.CurrentPhase(tid); *out << "# PinPointNumber " << pp << endl; *out << "# PinPointPhase " << phase << endl; } PIN_ReleaseLock(&pinLock); deactivate_counting(tid); if (control.PinPointsActive()) { // when doing pinpoints "mixes" we want to emit and then zero the stats when we stop a region. emit_stats(tid); emit_bbl_stats_sorted(tid); zero_stats(tid); } break; case CONTROL_STATS_EMIT: PIN_GetLock(&pinLock, tid+1); // for output *out << "# Emit stats for tid " << tid << endl; PIN_ReleaseLock(&pinLock); emit_stats(tid); break; case CONTROL_STATS_RESET: PIN_GetLock(&pinLock, tid+1); // for output *out << "# Reset stats for tid " << tid << endl; PIN_ReleaseLock(&pinLock); zero_stats(tid); break; default: ASSERTX(false); } } /* ===================================================================== */ VOID validate_bbl_count(THREADID tid, ADDRINT block_count_for_trace) { thread_data_t* tdata = get_tls(tid); tdata->resize(block_count_for_trace+1); } VOID PIN_FAST_ANALYSIS_CALL docount_bbl(ADDRINT block_id, THREADID tid) { thread_data_t* tdata = get_tls(tid); //ASSERTX(tdata->size() > block_id); tdata->block_counts[block_id] += tdata->enabled; } VOID docount_predicated_true(UINT32 index, THREADID tid) { thread_data_t* tdata = get_tls(tid); if (tdata->enabled) { stat_map_t::iterator i = tdata->cstats.predicated_true.find(index); if (i == tdata->cstats.predicated_true.end()) tdata->cstats.predicated_true[index] = 1; else i->second += 1; } } /* ===================================================================== */ VOID zero_stats(THREADID tid) { thread_data_t* tdata = get_tls(tid); tdata->cstats.clear(); UINT32 limit = tdata->size(); for(UINT32 i=0;i< limit;i++) tdata->block_counts[i]=0; } /* ===================================================================== */ VOID CheckForSpecialMarkers(INS ins, ADDRINT pc, unsigned int instruction_size) { // This checks for single instances of special 3B NOPs. // 0F1FF3 - start // 0F1FF4 - stop // 0F1FF5 - emit stats // 0F1FF6 - zero stats // FIXME: if there are collisions with existing instructions, we can // change them here. //FIXME: Ideally this would be integrated in to the control.H so file //so that anything can use it. if (instruction_size != 3) return; UINT8* pc_ptr = reinterpret_cast(pc); if (pc_ptr[0] == 0x0F && pc_ptr[1] == 0x1F) { switch(pc_ptr[2]) { case 0xF3: // start INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)activate_counting, IARG_THREAD_ID, IARG_END); break; case 0xF4: // stop INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)deactivate_counting, IARG_THREAD_ID, IARG_END); break; case 0xF5: // emit INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)emit_stats, IARG_THREAD_ID, IARG_END); break; case 0xF6: // zero INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)zero_stats, IARG_THREAD_ID, IARG_END); break; default: break; } } } /* ===================================================================== */ VOID Trace(TRACE trace, VOID *v) { static UINT32 basic_blocks = 0; const BOOL accurate_handling_of_predicates = KnobProfilePredicated.Value(); ADDRINT pc = TRACE_Address(trace); ADDRINT start_pc = pc; UINT32 new_blocks = 0; for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) { const INS head = BBL_InsHead(bbl); if (! INS_Valid(head)) continue; new_blocks++; } TRACE_InsertCall(trace, IPOINT_BEFORE, AFUNPTR(validate_bbl_count), IARG_THREAD_ID, IARG_UINT32, basic_blocks+new_blocks, IARG_END); for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) { const INS head = BBL_InsHead(bbl); if (! INS_Valid(head)) continue; // Summarize the stats for the bbl in a 0 terminated list // This is done at instrumentation time const UINT32 n = IndexStringLength(bbl, 1); // stats is an array of index types. We later multiply it by the // dynamic count for a block. stat_index_t *const stats = new stat_index_t[ n + 1]; stat_index_t *const stats_end = stats + (n + 1); stat_index_t *curr = stats; UINT32 ninsts = 0; for (INS ins = head; INS_Valid(ins); ins = INS_Next(ins)) { unsigned int instruction_size = INS_Size(ins); // This checks for x86-specific opcodes CheckForSpecialMarkers(ins, pc, instruction_size); // Count the number of times a predicated instruction is actually executed // this is expensive and hence disabled by default if( INS_IsPredicated(ins) && accurate_handling_of_predicates ) { INS_InsertPredicatedCall(ins, IPOINT_BEFORE, AFUNPTR(docount_predicated_true), IARG_UINT32, INS_GetIndex(ins), IARG_THREAD_ID, IARG_END); } if (KnobMapToFile) { INT32 line; string filename; PIN_GetSourceLocation(pc, NULL, &line, &filename); if (!filename.empty()) *out << "MAPADDR 0x" << hex << pc << " " << dec << line << " " << filename << endl; } curr = INS_GenerateIndexString(ins,curr,1); if (measurement == measure_opcode) curr = INS_GenerateIndexFMA(ins,curr); pc = pc + instruction_size; ninsts++; } // stats terminator *curr++ = 0; ASSERTX( curr == stats_end ); // Insert instrumentation to count the number of times the bbl is executed BBLSTATS * bblstats = new BBLSTATS(stats, start_pc, ninsts, pc-start_pc); INS_InsertCall(head, IPOINT_BEFORE, AFUNPTR(docount_bbl), IARG_FAST_ANALYSIS_CALL, IARG_UINT32, basic_blocks, IARG_THREAD_ID, IARG_END); // Remember the counter and stats so we can compute a summary at the end basic_blocks++; PIN_GetLock(&bbl_list_lock,1); statsList.push_back(bblstats); PIN_ReleaseLock(&bbl_list_lock); } } /* ===================================================================== */ VOID DumpStats(ofstream& out, CSTATS& stats, BOOL predicated_true, const string& title, THREADID tid) { out << "#\n# " << title << "\n#\n"; if (tid != INVALID_THREADID) out << "# TID " << tid << "\n"; out << "# "; if (measurement == measure_opcode) out << "opcode"; else if (measurement == measure_ilen) out << "inslen"; else if (measurement == measure_category) out << "catgry"; else if (measurement == measure_iform) out << "iform "; out<< " count-unpredicated count-predicated"; if( predicated_true ) out << " count-predicated-true"; out << "\n#\n"; // Compute the "total" bin. Stop at the INDEX_TOTAL for all histograms // except the iform. Iforms donot use the special rows, so we count everything. // build a map of the valid stats index values for all 3 tables. map m; UINT32 tu=0, tp=0, tpt=0; for(stat_map_t::iterator it = stats.unpredicated.begin() ; it != stats.unpredicated.end() ; it++) { if (measurement == measure_iform || it->first < INDEX_TOTAL) tu += it->second; m[it->first]=true; } for(stat_map_t::iterator it = stats.predicated.begin() ; it != stats.predicated.end() ; it++) { if (measurement == measure_iform || it->first < INDEX_TOTAL) tp += it->second; m[it->first]=true; } for(stat_map_t::iterator it=stats.predicated_true.begin();it != stats.predicated_true.end() ; it++) { if (measurement == measure_iform || it->first < INDEX_TOTAL) tpt += it->second; m[it->first]=true; } for(map::iterator it = m.begin(); it != m.end(); it++) { stat_map_t::iterator s; COUNTER up=0,pr=0,prt=0; UINT32 indx = it->first; s = stats.unpredicated.find(indx); if (s != stats.unpredicated.end()) up = s->second; s = stats.predicated.find(indx); if (s != stats.predicated.end()) pr = s->second; if (up == 0 && pr == 0) continue; out << setw(6) << indx << " " << ljstr(IndexToString(indx),25) << " " << setw(16) << up << " " << setw(16) << pr; if( predicated_true ) { s = stats.predicated_true.find(indx); prt = 0; if (s != stats.predicated_true.end()) prt = s->second; out << " " << setw(16) << prt; } out << endl; } // print the totals out << setw(6) << "000000" << " " << ljstr("*total",25) << " " << setw(16) << tu << " " << setw(16) << tp; if( predicated_true ) out << " " << setw(16) << tpt; out << endl; } /* ===================================================================== */ static UINT32 stat_dump_count = 0; VOID emit_bbl_stats(THREADID tid) { thread_data_t* tdata = get_tls(tid); // dynamic Counts // Need to lock here because we might be resize (and thus reallocing) // the statsList when we do a push_back in the instrumentation. PIN_GetLock(&bbl_list_lock,tid+1); UINT32 limit = tdata->size(); if ( limit > statsList.size() ) limit = statsList.size(); for(UINT32 i=0;i< limit ; i++) { UINT32 bcount = tdata->block_counts[i]; BBLSTATS* b = statsList[i]; if (b && b->_stats) for (const stat_index_t* stats = b->_stats; *stats; stats++) tdata->cstats.unpredicated[*stats] += bcount; } PIN_ReleaseLock(&bbl_list_lock); PIN_GetLock(&pinLock, tid+1); // for output stat_dump_count++; *out << "# EMIT_STATS " << stat_dump_count << endl; DumpStats(*out, tdata->cstats, KnobProfilePredicated, "$dynamic-counts",tid); *out << "# END_STATS" << endl; PIN_ReleaseLock(&pinLock); } int qsort_compare_fn(const void *a, const void *b) { const BBL_SORT_STATS* ba = static_cast(a); const BBL_SORT_STATS* bb = static_cast(b); return (bb->_icount - ba->_icount); // descending sort } VOID emit_bbl_stats_sorted(THREADID tid) { thread_data_t* tdata = get_tls(tid); // dynamic Counts // Need to lock here because we might be resize (and thus reallocing) // the statsList when we do a push_back in the instrumentation. PIN_GetLock(&bbl_list_lock,tid+1); UINT32 limit = tdata->size(); if ( limit > statsList.size() ) limit = statsList.size(); BBL_SORT_STATS* icounts = new BBL_SORT_STATS[limit]; UINT64 thread_total = 0; for(UINT32 i=0;i< limit ; i++) { BBLSTATS* b = statsList[i]; if (b) { UINT32 bcount = tdata->block_counts[i]; icounts[i]._icount = bcount * b->_ninst; icounts[i]._pc = b->_pc; icounts[i]._executions = bcount; icounts[i]._nbytes = b->_nbytes; thread_total += icounts[i]._icount; } } PIN_ReleaseLock(&bbl_list_lock); qsort(icounts, limit, sizeof(BBL_SORT_STATS), qsort_compare_fn); PIN_GetLock(&pinLock, tid+1); // for output *out << "# EMIT_STATS TOP BLOCKS " << stat_dump_count << " FOR TID " << tid << endl; if (limit > KnobTopBlocks.Value()) limit = KnobTopBlocks.Value(); UINT64 t =0; for(UINT32 i=0;isize(); if ( limit > statsList.size() ) limit = statsList.size(); for(UINT32 i=0;i< limit ; i++) { UINT32 bcount = tdata->block_counts[i]; BBLSTATS* b = statsList[i]; if (bcount && b && b->_stats) *out << "BLOCKCOUNT 0x" << hex << b->_pc << " " << dec << (bcount * b->_ninst ) << endl; } PIN_ReleaseLock(&bbl_list_lock); *out << "# END_EMIT_PC_STATS for TID " << tid << endl; PIN_ReleaseLock(&pinLock); } VOID emit_stats(THREADID tid) { emit_bbl_stats(tid); if (KnobMapToFile) emit_pc_stats(tid); } /* ===================================================================== */ void combine_dynamic_stats(unsigned int numThreads) { // combine all the rows from each thread in to the total variable. CSTATS total; for (THREADID i=0;icstats.unpredicated.begin(); it != tdata->cstats.unpredicated.end() ; it++) { stat_map_t::iterator x = total.unpredicated.find(it->first); if (x == total.unpredicated.end()) total.unpredicated[it->first] = it->second; else x->second += it->second; } for(stat_map_t::iterator it = tdata->cstats.predicated.begin(); it != tdata->cstats.predicated.end() ; it++) { stat_map_t::iterator x = total.predicated.find(it->first); if (x == total.predicated.end()) total.predicated[it->first] = it->second; else x->second += it->second; } for(stat_map_t::iterator it = tdata->cstats.predicated_true.begin(); it != tdata->cstats.predicated_true.end() ; it++) { stat_map_t::iterator x = total.predicated_true.find(it->first); if (x == total.predicated_true.end()) total.predicated_true[it->first] = it->second; else x->second += it->second; } } *out << "# EMIT_GLOBAL_DYNAMIC_STATS " << stat_dump_count << endl; DumpStats(*out, total, false, "$global-dynamic-counts",INVALID_THREADID); *out << endl << "# END_GLOBAL_DYNAMIC_STATS" << endl; } VOID Fini(int, VOID * v) // only runs once for the application { *out << "# FINI: end of program" << endl; for(unsigned int i=0;iclose(); } /* ===================================================================== */ #if defined(TARGET_IA32) || defined(TARGET_IA32E) ///////////////////////////////////////////////////////////////////////// // Add a disassembler ///////////////////////////////////////////////////////////////////////// static char nibble_to_ascii_hex(UINT8 i) { if (i<10) return i+'0'; if (i<16) return i-10+'A'; return '?'; } static void print_hex_line(char* buf, const UINT8* array, const int length) { int n = length; int i=0; if (length == 0) n = XED_MAX_INSTRUCTION_BYTES; for( i=0 ; i< n; i++) { buf[2*i+0] = nibble_to_ascii_hex(array[i]>>4); buf[2*i+1] = nibble_to_ascii_hex(array[i]&0xF); } buf[2*i]=0; } static string disassemble(UINT64 start, UINT64 stop) { UINT64 pc = start; xed_state_t dstate; xed_syntax_enum_t syntax = XED_SYNTAX_INTEL; xed_error_enum_t xed_error; xed_decoded_inst_t xedd; ostringstream os; if (sizeof(ADDRINT) == 4) xed_state_init(&dstate, XED_MACHINE_MODE_LEGACY_32, XED_ADDRESS_WIDTH_32b, XED_ADDRESS_WIDTH_32b); else xed_state_init(&dstate, XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b, XED_ADDRESS_WIDTH_64b); while( pc < stop ) { xed_decoded_inst_zero_set_mode(&xedd, &dstate); UINT32 len = 15; if (stop - pc < 15) len = stop-pc; xed_error = xed_decode(&xedd, reinterpret_cast(pc), len); bool okay = (xed_error == XED_ERROR_NONE); iostream::fmtflags fmt = os.flags(); os << std::setfill('0') << "XDIS " << std::hex << std::setw(sizeof(ADDRINT)*2) << pc << std::dec << ": " << std::setfill(' ') << std::setw(4); if (okay) { char buffer[200]; unsigned int dec_len, sp; os << xed_extension_enum_t2str(xed_decoded_inst_get_extension(&xedd)); dec_len = xed_decoded_inst_get_length(&xedd); print_hex_line(buffer, reinterpret_cast(pc), dec_len); os << " " << buffer; for ( sp=dec_len; sp < 12; sp++) // pad out the instruction bytes os << " "; os << " "; memset(buffer,0,200); int dis_okay = xed_format_context(syntax, &xedd, buffer, 200, pc, 0, 0); if (dis_okay) os << buffer << endl; else os << "Error disasassembling pc 0x" << std::hex << pc << std::dec; pc += dec_len; } else { // print the byte and keep going. UINT8 memval = *reinterpret_cast(pc); os << "???? " // no extension << std::hex << std::setw(2) << std::setfill('0') << static_cast(memval) << std::endl; pc += 1; } os.flags(fmt); } return os.str(); } #endif /* ===================================================================== */ int main(int argc, CHAR **argv) { if( PIN_Init(argc,argv) ) return Usage(); PIN_InitLock(&pinLock); PIN_InitLock(&bbl_list_lock); // obtain a key for TLS storage tls_key = PIN_CreateThreadDataKey(0); string filename = KnobOutputFile.Value(); if (KnobPid) { filename += "." + decstr(getpid()); } out = new std::ofstream(filename.c_str()); control.CheckKnobs(Handler, 0); // make sure that exactly one thing-to-count knob is specified. if (KnobInstructionLengthMix.Value() && KnobCategoryMix.Value()) { cerr << "Must have at most one of: -iform, -ilen or -category " << "as a pintool option" << endl; exit(1); } if (KnobInstructionLengthMix.Value()) measurement = measure_ilen; if (KnobCategoryMix.Value()) measurement = measure_category; if (KnobIformMix.Value()) { #if defined(TARGET_IA32) || defined(TARGET_IA32E) measurement = measure_iform; #else cerr << "Cannot only compute iform mixes on IA32 and Intel64" << endl; #endif } TRACE_AddInstrumentFunction(Trace, 0); PIN_AddThreadStartFunction(ThreadStart, 0); PIN_AddFiniFunction(Fini, 0); PIN_StartProgram(); // Never returns return 0; #if defined(NEED_TO_PAD) (void) pad0; //pacify compiler (void) pad1; (void) pad2; #endif } /* ===================================================================== */ /* eof */ /* ===================================================================== */