GTPin
GTReplay: Mem Sample Tool

The Mem tool implements a simple memory model

Running Mem tool

To run the Mem tool in its default configuration, use this command:

Profilers\GTReplay\intel64\gtreplay.exe -t mem -- path-to-the-directory-containing-the-trace

(Back to the list of all GTReplay Sample Tools)

mem.cpp

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2021-2022 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*******************************************************************************************************
00008  * MEM tool
00009  *
00010  * Simple memory model - follow writes to global memory.
00011  *
00012  * NOTE: the tool callbacks might be called from different threads.
00013  */
00014 #include <mutex>
00015 #include <stdio.h>
00016 #include <string.h>
00017 
00018 #include "gtreplay_assert.h"
00019 #include "gtreplay_client.h"
00020 #include "knob_parser.h"
00021 #include "mem_model.h"
00022 
00023 // Global variables
00024 uint8_t* sendDecodeData;
00025 
00026 uint32_t   gMaxNumOfHwThreads = 0;
00027 uint32_t   gNumOfInstructions = 0;
00028 
00029 MemoryModel mem;
00030 std::mutex mem_mutex;
00031 uint32_t count = 0;
00032 uint64_t max_addr = 0;
00033 
00034 /*
00035  * BeforeInsCallback - callback called before instruction execution
00036  *
00037  * @params[in] tid - the ID of the GPU HW thread for which the callback is called
00038  * @params[in] ins - a handle to the current instruction
00039  * @params[in] state - a handle to the HW Thread state corresponding to tid
00040  */
00041 void BeforeInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*)
00042 {
00043     // Lock is required since this callback can be called from different threads
00044     std::lock_guard<std::mutex> lock(mem_mutex);
00045 
00046     GTREPLAY_ASSERT(GTReplay_IsMemoryWrite(ins));
00047 
00048     uint32_t id = GTReplay_InsId(ins);
00049 
00050     // Obtain attributes of access for this instruction
00051     uint32_t numOfAccesses = GTReplay_GetNumOfMemoryAccesses(ins); // number of accesses
00052     uint32_t surfaceId     = GTReplay_GetMemorySurfaceId(ins);     // surface ID (BTI)
00053     uint32_t memAccessSize = GTReplay_GetMemoryAccessSize(ins);    // memory access size
00054     
00055     // Obtain the dynamic execution mask - active channels
00056     uint32_t chan = GTReplay_DynamicExecMask(ins,  state);
00057 
00058     // Iterate over all accesses
00059     for (uint32_t i = 0; i < numOfAccesses; i++)
00060     {
00061         uint8_t data[32] = {};
00062 
00063         // If the current channel is active
00064         if (chan & (1 << i))
00065         {
00066             // Obtain the current access address
00067             uint64_t addr = GTReplay_GetMemoryAccessAddr(ins, state, i);
00068             // Obtain the current access value
00069             GTReplay_GetMemoryWriteValue(ins, state, i, data);
00070 
00071             uint32_t* d = (uint32_t*)data;
00072 
00073             // Write to memory in granularity of 4 bytes
00074             for (uint32_t e = 0; e < memAccessSize / 4; e++)
00075             {
00076                 mem.Write(surfaceId, (uint32_t)addr, d[0]);
00077 
00078                 if (addr > max_addr) max_addr = addr;
00079 
00080                 count += 4;
00081                 addr += 4;
00082                 d++;
00083             }
00084         }
00085     }
00086 }
00087 
00088 /*
00089  * AfterInsCallback - callback called after instruction execution
00090  *                    An illustration - this tool doesn't need to register after instruction callbacks
00091  *
00092  * @params[in] tid - the ID of the GPU HW thread for which the callback is called
00093  * @params[in] ins - a handle to the current instruction
00094  * @params[in] state - a handle to the HW Thread state corresponding to tid
00095  */
00096 void AfterInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*)
00097 {
00098 }
00099 
00100 /*
00101  * OnKernelComplete - callback called upon kernel completion
00102  *
00103  * @params[in] kernel - a handle to the kernel
00104  */
00105 void OnKernelComplete(GTReplayKernel kernel)
00106 {
00107     // Print the results
00108     printf("\n\n=================\n"
00109            "MEM TOOL\n"
00110            "=================\n\n");
00111 
00112     printf("written memory = 0x%08x bytes max_addr = 0x%016llx\n", count, max_addr);
00113 
00114     // Print the resulting memory image
00115     mem.Print(true);
00116 }
00117 
00118 /*
00119  * OnKernelBuild - callback called before kernel execution
00120  *                 The purpose of this callback is to traverse the kernel binary and instrument callbacks
00121  *
00122  * @params[in] kernel - a handle to the kernel
00123  */
00124 void OnKernelBuild(GTReplayKernel kernel)
00125 {
00126     uint32_t gModelId = GTReplay_GetModel(kernel);
00127 
00128     gMaxNumOfHwThreads = GTReplay_MaxNumOfHWThreads(gModelId);
00129 
00130     gNumOfInstructions = GTReplay_NumOfInstructions(kernel);
00131     
00132     // Traverse all the basic blocks 
00133     for (GTReplayBbl bbl = GTReplay_BblHead(kernel); GTReplay_BblValid(bbl); bbl = GTReplay_BblNext(bbl))
00134     {
00135         // Traverse all the instruction within the basic blocks 
00136         for (GTReplayIns ins = GTReplay_InsHead(bbl); GTReplay_InsValid(ins); ins = GTReplay_InsNext(ins))
00137         {
00138             // Check whether the instruction is memory write
00139             if (!GTReplay_IsMemoryWrite(ins))
00140             {
00141                 // If not, there is nothing to do
00142                 continue;
00143                }
00144             // Register callback to be called before instruction execution
00145             GTReplay_RegisterCallbackBeforeIns(kernel, ins, BeforeInsCallback, NULL);
00146         }
00147     }
00148 }
00149 
00150 /*
00151  * GTReplay_Entry - tool entry point
00152  */
00153 extern "C"
00154 DLLEXP void FASTCALL GTReplay_Entry(int argc, const char *argv[])
00155 {
00156     // configure GTReplay
00157     ConfigureGTReplay(argc, argv);
00158     
00159     // register OnKernelBuild and OnKernelComplete callbacks
00160     GTReplay_RegisterOnKernelBuildCallback(OnKernelBuild);
00161     GTReplay_RegisterOnKernelCompleteCallback(OnKernelComplete);
00162 
00163     // Start GTReplay
00164     GTReplay_Start();
00165 }

mem_model.h

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2021 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 #ifndef _MEM_MODEL_H
00008 #define _MEM_MODEL_H
00009 
00010 #include <cstring>
00011 #include <mutex>
00012 
00013 
00014 #define FOUR_MB (4 * 1024 * 1024)
00015 #define FOUR_KB (4 * 1024)
00016 
00017 /*
00018  * Surface - represents a memory buffer
00019  */
00020 class Surface {
00021 public:
00022     // Constructor
00023     Surface();
00024 
00025     // Destructor
00026     ~Surface();
00027     
00028     // Writing into the surface
00029     // 
00030     // addr - an offset into the surface
00031     // data - data to be written
00032     void Write(uint32_t addr, uint32_t data);
00033 
00034     // Printing the surface image
00035     void Print(bool asFloat);
00036 
00037 
00038 private:
00039 
00040     // Computes the pointer to the surface corresponding to the given address
00041     uint8_t * GetPtr(uint32_t addr);
00042 
00043     // PageTable - represents a 4KB page table where each entry points to a 4KB page
00044     typedef struct PageTableS {
00045         // Initially each page table is empty
00046         PageTableS() { memset(pte, 0, 1024 * sizeof(uint32_t*)); }
00047         uint32_t* pte[1024];
00048     } PageTable;
00049 
00050     PageTable*   _pageDir[1024]; // Page directory is an array of 1024 pointers to page tables
00051 
00052     uint32_t _numOfWrites;  // number of writes
00053     uint32_t _maxAddr;      // maximal address
00054 };
00055     
00056 /*
00057  * MemoryModel - a simple memory model
00058  */ 
00059 class MemoryModel {
00060 public:
00061 
00062     // Constructor
00063     MemoryModel();
00064     
00065     // Destructor
00066     ~MemoryModel();
00067 
00068     // Writing into a surface
00069     // 
00070     // bti  - surface ID (BTI index)
00071     // addr - an offset into the surface
00072     // data - data to be written
00073     void Write(uint32_t bti, uint32_t addr, uint32_t data);
00074 
00075     // Printing the memory image
00076     void Print(bool asFloat);
00077 
00078 private:
00079 
00080     Surface*   _mem[256];   // an array of potential 256 surfaces
00081 
00082     std::mutex _mutex;      // mutex
00083 };
00084 
00085 #endif

mem_model.cpp

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2021 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 #include "mem_model.h"
00008 #include <cstdio>
00009 
00010 // MemoryModel constructor
00011 MemoryModel::MemoryModel()
00012 {
00013     // Initially all surface pointers are NULL
00014     for (uint32_t bti = 0; bti < 256; bti++)
00015     {
00016         _mem[bti] = NULL;
00017     }
00018 }
00019     
00020 // MemoryModel destructor
00021 MemoryModel::~MemoryModel()
00022 {
00023     // Iterate over all potential surfaces
00024     for (uint32_t bti = 0; bti < 256; bti++)
00025     {
00026         // If exists
00027         if (_mem[bti])
00028         {
00029             // Delete it
00030             delete _mem[bti];
00031         }
00032     }
00033 }
00034 
00035 // Writing into a surface
00036 // 
00037 // bti  - surface ID (BTI index)
00038 // addr - an offset into the surface
00039 // data - data to be written
00040 void MemoryModel::Write(uint32_t bti, uint32_t addr, uint32_t data)
00041 {
00042     std::lock_guard<std::mutex> lock(_mutex);
00043 
00044     // If first access to the specific surface
00045     if (_mem[bti] == NULL)
00046     {
00047         // Create it
00048         _mem[bti] = new Surface;
00049     }
00050     
00051     // Perform a write into the surface
00052     _mem[bti]->Write(addr, data);
00053 }
00054 
00055 // Printing the memory image
00056 void MemoryModel::Print(bool asFloat)
00057 {
00058     // Iterate over all potential surfaces
00059     for (uint32_t bti = 0; bti < 256; bti++)
00060     {
00061         // If exists
00062         if (_mem[bti])
00063         {
00064             printf("\nSurface: 0x%02x\n=============\n\n", bti);
00065 
00066             // Print the surface
00067             _mem[bti]->Print(asFloat);
00068         }
00069     }
00070 }
00071 
00072 
00073 // Surface constructor
00074 Surface::Surface() : _numOfWrites(0), _maxAddr(0)
00075 {
00076     // Page Directory is initially empty
00077     memset(_pageDir, 0, 1024 * sizeof(PageTable*));
00078 }
00079 
00080 // Surface destructor
00081 Surface::~Surface()
00082 {
00083     // Iterate over all Page Directory entries
00084     for (uint32_t pdi = 0; pdi < 1024; pdi++)
00085     {
00086         PageTable* pageTable = _pageDir[pdi];
00087 
00088         // If current Page Table exists
00089         if (pageTable)
00090         {
00091             // Iterate over all Page Table entries
00092             for (uint32_t pti = 0; pti < 1024; pti++)
00093             {
00094                 uint32_t* page = pageTable->pte[pti];
00095 
00096                 // If page exists
00097                 if (page)
00098                 {
00099                     // Delete it
00100                     delete[] page;
00101                     pageTable->pte[pti] = NULL;
00102                 }
00103             }
00104 
00105             // Delete Page Table
00106             delete pageTable;
00107             _pageDir[pdi] = NULL;
00108         }
00109     }
00110 }
00111 
00112 // Writing into the surface
00113 // 
00114 // addr - an offset into the surface
00115 // data - data to be written
00116 void Surface::Write(uint32_t addr, uint32_t data)
00117 {
00118     // Update statistics
00119     _numOfWrites++;
00120     if (_maxAddr < addr)
00121     {
00122         _maxAddr = addr;
00123     }
00124 
00125     // Get a pointer to required offset
00126     uint32_t* ptr = (uint32_t*)GetPtr(addr);
00127 
00128     // Write the data
00129     ptr[0] = data;
00130 }
00131 
00132 
00133 // Printing the surface image
00134 void Surface::Print(bool asFloat)
00135 {
00136     // Print surface statistics
00137     printf("Surface: numOfWrites = %d maxAddr = 0x%08x\n", _numOfWrites, _maxAddr);
00138 
00139     // Iterate over all entries within the Page Directory
00140     for (uint32_t pdi = 0; pdi < 1024; pdi++)
00141     {
00142         PageTable* pageTable = _pageDir[pdi];
00143 
00144         // If corresponding Page Table exists
00145         if (pageTable)
00146         {
00147             // Compute the base described by the current Page Table
00148             uint32_t pageTableBase = pdi * FOUR_MB;
00149 
00150             // Iterate over all Page Table entries
00151             for (uint32_t pti = 0; pti < 1024; pti++)
00152             {
00153                 uint32_t* page = pageTable->pte[pti];
00154 
00155                 // If the current page exists
00156                 if (page)
00157                 {
00158                     // Compute the base of the current page
00159                     uint32_t pageBase = pageTableBase + pti * FOUR_KB;
00160 
00161                     // Dump all 4bytes of the current page
00162                     for (uint32_t a = 0; a < 1024; a++)
00163                     {
00164                         uint32_t addr = pageBase + a * 4;
00165                         uint32_t val  = page[a];
00166 
00167                         if (asFloat) printf("0x%08x: 0x%08x (%f)\n", addr, val, *(float*)&val);
00168                         else         printf("0x%08x: 0x%08x\n", addr, val);
00169                     }
00170                 }
00171             }
00172         }
00173     }
00174 }
00175 
00176 
00177 // Computes the pointer to the surface corresponding to the given address
00178 uint8_t * Surface::GetPtr(uint32_t addr)
00179 {
00180     // Compute Page Directory index, Page Table index and the offset into the page from the address
00181     uint32_t pdi = addr >> 22;
00182     uint32_t pti = (addr & 0x003FF000) >> 12;
00183     uint32_t offset = addr & 0xFFF;
00184 
00185     // Get corresponding Page Table
00186     PageTable* pageTable = _pageDir[pdi];
00187 
00188     // If doesn't exist
00189     if (pageTable == NULL)
00190     {
00191         // Allocate one
00192         pageTable = new PageTable;
00193 
00194         // Update Page Directory
00195         _pageDir[pdi] = pageTable;
00196     }
00197 
00198     // Compute the current page
00199     uint32_t* page = pageTable->pte[pti];
00200 
00201     // If doesn't exist
00202     if (page == NULL)
00203     {
00204         // Allocate one
00205         page = new uint32_t[1024];
00206         // Initialize with zeros
00207         memset(page, 0, 0x1000);
00208         // Update Page Table
00209         pageTable->pte[pti] = page;
00210     }
00211 
00212     // Compute the pointer into the current page
00213     uint8_t* ptr = (uint8_t*)page + offset;
00214 
00215     return ptr;
00216 }
00217 

(Back to the list of all GTReplay Sample Tools)


 All Data Structures Functions Variables Typedefs Enumerations Enumerator


  Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT