/* * Copyright 2002-2019 Intel Corporation. * * This software is provided to you as Sample Source Code as defined in the accompanying * End User License Agreement for the Intel(R) Software Development Products ("Agreement") * section 1.L. * * This software and the related documents are provided as is, with no express or implied * warranties, other than those that are expressly stated in the License. */ /*! @file * This file contains a configurable cache class */ #ifndef PIN_CACHE_H #define PIN_CACHE_H #define KILO 1024 #define MEGA (KILO*KILO) #define GIGA (KILO*MEGA) typedef UINT64 CACHE_STATS; // type of cache hit/miss counters #include using std::string; using std::ostringstream; /*! RMR (rodric@gmail.com) * - temporary work around because decstr() * casts 64 bit ints to 32 bit ones */ static string mydecstr(UINT64 v, UINT32 w) { ostringstream o; o.width(w); o << v; string str(o.str()); return str; } /*! * @brief Checks if n is a power of 2. * @returns true if n is power of 2 */ static inline bool IsPower2(UINT32 n) { return ((n & (n - 1)) == 0); } /*! * @brief Computes floor(log2(n)) * Works by finding position of MSB set. * @returns -1 if n == 0. */ static inline INT32 FloorLog2(UINT32 n) { INT32 p = 0; if (n == 0) return -1; if (n & 0xffff0000) { p += 16; n >>= 16; } if (n & 0x0000ff00) { p += 8; n >>= 8; } if (n & 0x000000f0) { p += 4; n >>= 4; } if (n & 0x0000000c) { p += 2; n >>= 2; } if (n & 0x00000002) { p += 1; } return p; } /*! * @brief Computes floor(log2(n)) * Works by finding position of MSB set. * @returns -1 if n == 0. */ static inline INT32 CeilLog2(UINT32 n) { return FloorLog2(n - 1) + 1; } /*! * @brief Cache tag - self clearing on creation */ class CACHE_TAG { private: ADDRINT _tag; public: CACHE_TAG(ADDRINT tag = 0) { _tag = tag; } bool operator==(const CACHE_TAG &right) const { return _tag == right._tag; } operator ADDRINT() const { return _tag; } }; /*! * Everything related to cache sets */ namespace CACHE_SET { /*! * @brief Cache set direct mapped */ class DIRECT_MAPPED { private: CACHE_TAG _tag; public: DIRECT_MAPPED(UINT32 associativity = 1) { ASSERTX(associativity == 1); } VOID SetAssociativity(UINT32 associativity) { ASSERTX(associativity == 1); } UINT32 GetAssociativity(UINT32 associativity) { return 1; } UINT32 Find(CACHE_TAG tag) { return(_tag == tag); } VOID Replace(CACHE_TAG tag) { _tag = tag; } }; /*! * @brief Cache set with round robin replacement */ template class ROUND_ROBIN { private: CACHE_TAG _tags[MAX_ASSOCIATIVITY]; UINT32 _tagsLastIndex; UINT32 _nextReplaceIndex; public: ROUND_ROBIN(UINT32 associativity = MAX_ASSOCIATIVITY) : _tagsLastIndex(associativity - 1) { ASSERTX(associativity <= MAX_ASSOCIATIVITY); _nextReplaceIndex = _tagsLastIndex; for (INT32 index = _tagsLastIndex; index >= 0; index--) { _tags[index] = CACHE_TAG(0); } } VOID SetAssociativity(UINT32 associativity) { ASSERTX(associativity <= MAX_ASSOCIATIVITY); _tagsLastIndex = associativity - 1; _nextReplaceIndex = _tagsLastIndex; } UINT32 GetAssociativity(UINT32 associativity) { return _tagsLastIndex + 1; } UINT32 Find(CACHE_TAG tag) { bool result = true; for (INT32 index = _tagsLastIndex; index >= 0; index--) { // this is an ugly micro-optimization, but it does cause a // tighter assembly loop for ARM that way ... if(_tags[index] == tag) goto end; } result = false; end: return result; } VOID Replace(CACHE_TAG tag) { // g++ -O3 too dumb to do CSE on following lines?! const UINT32 index = _nextReplaceIndex; _tags[index] = tag; // condition typically faster than modulo _nextReplaceIndex = (index == 0 ? _tagsLastIndex : index - 1); } }; } // namespace CACHE_SET namespace CACHE_ALLOC { typedef enum { STORE_ALLOCATE, STORE_NO_ALLOCATE } STORE_ALLOCATION; } /*! * @brief Generic cache base class; no allocate specialization, no cache set specialization */ class CACHE_BASE { public: // types, constants typedef enum { ACCESS_TYPE_LOAD, ACCESS_TYPE_STORE, ACCESS_TYPE_NUM } ACCESS_TYPE; typedef enum { CACHE_TYPE_ICACHE, CACHE_TYPE_DCACHE, CACHE_TYPE_NUM } CACHE_TYPE; protected: static const UINT32 HIT_MISS_NUM = 2; CACHE_STATS _access[ACCESS_TYPE_NUM][HIT_MISS_NUM]; private: // input params const std::string _name; const UINT32 _cacheSize; const UINT32 _lineSize; const UINT32 _associativity; // computed params const UINT32 _lineShift; const UINT32 _setIndexMask; CACHE_STATS SumAccess(bool hit) const { CACHE_STATS sum = 0; for (UINT32 accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) { sum += _access[accessType][hit]; } return sum; } protected: UINT32 NumSets() const { return _setIndexMask + 1; } public: // constructors/destructors CACHE_BASE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity); // accessors UINT32 CacheSize() const { return _cacheSize; } UINT32 LineSize() const { return _lineSize; } UINT32 Associativity() const { return _associativity; } // CACHE_STATS Hits(ACCESS_TYPE accessType) const { return _access[accessType][true];} CACHE_STATS Misses(ACCESS_TYPE accessType) const { return _access[accessType][false];} CACHE_STATS Accesses(ACCESS_TYPE accessType) const { return Hits(accessType) + Misses(accessType);} CACHE_STATS Hits() const { return SumAccess(true);} CACHE_STATS Misses() const { return SumAccess(false);} CACHE_STATS Accesses() const { return Hits() + Misses();} VOID SplitAddress(const ADDRINT addr, CACHE_TAG & tag, UINT32 & setIndex) const { tag = addr >> _lineShift; setIndex = tag & _setIndexMask; } VOID SplitAddress(const ADDRINT addr, CACHE_TAG & tag, UINT32 & setIndex, UINT32 & lineIndex) const { const UINT32 lineMask = _lineSize - 1; lineIndex = addr & lineMask; SplitAddress(addr, tag, setIndex); } string StatsLong(string prefix = "", CACHE_TYPE = CACHE_TYPE_DCACHE) const; }; CACHE_BASE::CACHE_BASE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity) : _name(name), _cacheSize(cacheSize), _lineSize(lineSize), _associativity(associativity), _lineShift(FloorLog2(lineSize)), _setIndexMask((cacheSize / (associativity * lineSize)) - 1) { ASSERTX(IsPower2(_lineSize)); ASSERTX(IsPower2(_setIndexMask + 1)); for (UINT32 accessType = 0; accessType < ACCESS_TYPE_NUM; accessType++) { _access[accessType][false] = 0; _access[accessType][true] = 0; } } /*! * @brief Stats output method */ string CACHE_BASE::StatsLong(string prefix, CACHE_TYPE cache_type) const { const UINT32 headerWidth = 19; const UINT32 numberWidth = 12; string out; out += prefix + _name + ":" + "\n"; if (cache_type != CACHE_TYPE_ICACHE) { for (UINT32 i = 0; i < ACCESS_TYPE_NUM; i++) { const ACCESS_TYPE accessType = ACCESS_TYPE(i); std::string type(accessType == ACCESS_TYPE_LOAD ? "Load" : "Store"); out += prefix + ljstr(type + "-Hits: ", headerWidth) + mydecstr(Hits(accessType), numberWidth) + " " +fltstr(100.0 * Hits(accessType) / Accesses(accessType), 2, 6) + "%\n"; out += prefix + ljstr(type + "-Misses: ", headerWidth) + mydecstr(Misses(accessType), numberWidth) + " " +fltstr(100.0 * Misses(accessType) / Accesses(accessType), 2, 6) + "%\n"; out += prefix + ljstr(type + "-Accesses: ", headerWidth) + mydecstr(Accesses(accessType), numberWidth) + " " +fltstr(100.0 * Accesses(accessType) / Accesses(accessType), 2, 6) + "%\n"; out += prefix + "\n"; } } out += prefix + ljstr("Total-Hits: ", headerWidth) + mydecstr(Hits(), numberWidth) + " " +fltstr(100.0 * Hits() / Accesses(), 2, 6) + "%\n"; out += prefix + ljstr("Total-Misses: ", headerWidth) + mydecstr(Misses(), numberWidth) + " " +fltstr(100.0 * Misses() / Accesses(), 2, 6) + "%\n"; out += prefix + ljstr("Total-Accesses: ", headerWidth) + mydecstr(Accesses(), numberWidth) + " " +fltstr(100.0 * Accesses() / Accesses(), 2, 6) + "%\n"; out += "\n"; return out; } /*! * @brief Templated cache class with specific cache set allocation policies * * All that remains to be done here is allocate and deallocate the right * type of cache sets. */ template class CACHE : public CACHE_BASE { private: SET _sets[MAX_SETS]; public: // constructors/destructors CACHE(std::string name, UINT32 cacheSize, UINT32 lineSize, UINT32 associativity) : CACHE_BASE(name, cacheSize, lineSize, associativity) { ASSERTX(NumSets() <= MAX_SETS); for (UINT32 i = 0; i < NumSets(); i++) { _sets[i].SetAssociativity(associativity); } } // modifiers /// Cache access from addr to addr+size-1 bool Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType); /// Cache access at addr that does not span cache lines bool AccessSingleLine(ADDRINT addr, ACCESS_TYPE accessType); }; /*! * @return true if all accessed cache lines hit */ template bool CACHE::Access(ADDRINT addr, UINT32 size, ACCESS_TYPE accessType) { const ADDRINT highAddr = addr + size; bool allHit = true; const ADDRINT lineSize = LineSize(); const ADDRINT notLineMask = ~(lineSize - 1); do { CACHE_TAG tag; UINT32 setIndex; SplitAddress(addr, tag, setIndex); SET & set = _sets[setIndex]; bool localHit = set.Find(tag); allHit &= localHit; // on miss, loads always allocate, stores optionally if ( (! localHit) && (accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == CACHE_ALLOC::STORE_ALLOCATE)) { set.Replace(tag); } addr = (addr & notLineMask) + lineSize; // start of next cache line } while (addr < highAddr); _access[accessType][allHit]++; return allHit; } /*! * @return true if accessed cache line hits */ template bool CACHE::AccessSingleLine(ADDRINT addr, ACCESS_TYPE accessType) { CACHE_TAG tag; UINT32 setIndex; SplitAddress(addr, tag, setIndex); SET & set = _sets[setIndex]; bool hit = set.Find(tag); // on miss, loads always allocate, stores optionally if ( (! hit) && (accessType == ACCESS_TYPE_LOAD || STORE_ALLOCATION == CACHE_ALLOC::STORE_ALLOCATE)) { set.Replace(tag); } _access[accessType][hit]++; return hit; } // define shortcuts #define CACHE_DIRECT_MAPPED(MAX_SETS, ALLOCATION) CACHE #define CACHE_ROUND_ROBIN(MAX_SETS, MAX_ASSOCIATIVITY, ALLOCATION) CACHE, MAX_SETS, ALLOCATION> #endif // PIN_CACHE_H