GTPin
GTReplay: Imix Sample Tool

The Imix tool provides the dynamic frequencies of each of the kernel instructions ("instruction mix"), in the form of opcode histograms

Running Imix tool

To run the Imix tool in its default configuration, use this command:

Profilers\GTReplay\intel64\gtreplay.exe -t imix -- path-to-the-directory-containing-the-trace

(Back to the list of all GTReplay Sample Tools)

imix.cpp

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2021-2022 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*******************************************************************************************************
00008  * IMIX tool
00009  *
00010  * Count instruction mix - dynamic count of each instruction in the trace and
00011  *                         provide dynamic opcode histograms per data type
00012  *
00013  * NOTE: the tool callbacks might be called from different threads.
00014  */
00015 #include <stdio.h>
00016 #include <string.h>
00017 #include <vector>
00018 
00019 #include "gtreplay_assert.h"
00020 #include "gtreplay_client.h"
00021 #include "knob_parser.h"
00022 #include "knob.h"
00023 
00024 Knob<bool>  gKnobNoHist("no-hist", false, "Forbids creating per data type dynamic opcode histograms");
00025 Knob<bool>  gKnobShowIp("show-ip", false, "Shows IPs of the instructions");
00026 
00027 // Global variables 
00028 uint32_t    gMaxNumOfHwThreads = 0;
00029 uint32_t    gMaxNumOfTiles     = 0;
00030 uint32_t    gNumOfInstructions = 0;
00031 uint32_t    gNumOfOpcodes      = 0;
00032 uint32_t    gNumOfDataTypes    = 0;
00033 
00034 struct PerTileInstCounter {
00035     PerTileInstCounter()
00036     {
00037         count.resize(gMaxNumOfHwThreads);
00038         for (uint32_t i = 0; i < gMaxNumOfHwThreads; i++)
00039         {
00040             count[i].resize(gNumOfInstructions, 0);
00041         }
00042     }
00043     std::vector<std::vector<uint64_t>> count;
00044 };
00045 
00046 std::vector<PerTileInstCounter>    icount;
00047 std::vector<uint64_t>              totalIcount;
00048 std::vector<std::vector<uint64_t>> opcodes;
00049 uint64_t    total_icount = 0;
00050 std::string kernelName;
00051 
00052 /*
00053  * BeforeInsCallback - callback called before instruction execution
00054  *
00055  * @params[in] tid - the ID of the GPU HW thread for which the callback is called
00056  * @params[in] ins - a handle to the current instruction
00057  * @params[in] state - a handle to the HW Thread state corresponding to tid
00058  */
00059 void BeforeInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*)
00060 {
00061     GTREPLAY_ASSERT(tileId < gMaxNumOfTiles&& tid < gMaxNumOfHwThreads);
00062 
00063     // Obtain the instruction ID within the kernel
00064     uint32_t id = GTReplay_InsId(ins);
00065     // Update the instruction counter corresponding to the current HW thread and current instruction
00066     icount[tileId].count[tid][id]++;
00067 }
00068 
00069 /*
00070  * AfterInsCallback - callback called after instruction execution
00071  *                    An illustration - this tool doesn't need to register after instruction callbacks
00072  *
00073  * @params[in] tid - the ID of the GPU HW thread for which the callback is called
00074  * @params[in] ins - a handle to the current instruction
00075  * @params[in] state - a handle to the HW Thread state corresponding to tid
00076  */
00077 void AfterInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*)
00078 {
00079 }
00080 
00081 /*
00082  * OnKernelComplete - callback called upon kernel completion
00083  *
00084  * @params[in] kernel - a handle to the kernel
00085  */
00086 void OnKernelComplete(GTReplayKernel kernel)
00087 {
00088     // accumulated icounts over HW threads 
00089     for (uint32_t i = 0; i < gNumOfInstructions; i++)
00090     {
00091         totalIcount[i] = 0;
00092 
00093         for (uint32_t tile = 0; tile < gMaxNumOfTiles; tile++)
00094         {
00095             for (uint32_t t = 0; t < gMaxNumOfHwThreads; t++)
00096             {
00097                 totalIcount[i] += icount[tile].count[t][i];
00098             }
00099         }
00100 
00101         total_icount += totalIcount[i];
00102     }
00103 
00104     // Create opcode histograms
00105     for (uint32_t i = 0; i < gNumOfInstructions; i++)
00106     {
00107         GTReplayIns ins = GTReplay_Ins(kernel, i);
00108         uint32_t opcode = GTReplay_Opcode(ins);
00109         uint32_t dataType = GTReplay_DataType(ins);
00110 
00111         opcodes[opcode][dataType] += totalIcount[i];
00112     }
00113 
00114     std::cout << "\n\n=================\n";
00115     std::cout << "IMIX TOOL\n";
00116     std::cout << "=================\n\n";
00117 
00118     std::cout << "Kernel: " << kernelName << "\n\n";
00119     std::cout << "TOTAL ICOUNT = " << total_icount << "\n";
00120 
00121    
00122     for (uint32_t i = 0; i < gNumOfInstructions; i++)
00123     {
00124         GTReplayIns ins = GTReplay_Ins(kernel, i);
00125         if (gKnobShowIp)
00126         {
00127             uint32_t ip = GTReplay_GetInsIp(ins);
00128             std::cout << std::hex << ip << ":  ";
00129         }
00130         std::cout << "[" << std::dec << std::setw(15) << totalIcount[i] << "] " << std::string(GTReplay_Disasm(ins)) << std::endl;
00131     }
00132 
00133     if (!gKnobNoHist)
00134     {
00135         // Print opcode histograms per data type  
00136         for (uint32_t t = 0; t < gNumOfDataTypes; t++)
00137         {
00138             bool found = false;
00139 
00140             for (uint32_t o = 0; o < gNumOfOpcodes; o++)
00141             {
00142                 if (opcodes[o][t])
00143                 {
00144                     found = true;
00145                     break;
00146                 }
00147             }
00148         
00149             if (!found)
00150             {
00151                 continue;
00152             }
00153 
00154             std::cout << "\nDATA TYPE: " << std::string(GTReplay_DataTypeName(t)) << "\n\n";
00155         
00156             for (uint32_t o = 0; o < gNumOfOpcodes; o++)
00157             {
00158                 if (opcodes[o][t])
00159                 {
00160                     std::cout << std::setw(10) << std::string(GTReplay_OpcodeName(o)) << "   " << opcodes[o][t] << "\n";
00161                 }
00162             }
00163         }
00164     }
00165 }
00166 
00167 /*
00168  * OnKernelBuild - callback called before kernel execution
00169  *                 The purpose of this callback is to traverse the kernel binary and instrument callbacks
00170  *
00171  * @params[in] kernel - a handle to the kernel
00172  */
00173 void OnKernelBuild(GTReplayKernel kernel)
00174 {
00175     uint32_t gModelId = GTReplay_GetModel(kernel);
00176 
00177     gMaxNumOfHwThreads = GTReplay_MaxNumOfHWThreads(gModelId);
00178 
00179     gMaxNumOfTiles = GTReplay_MaxNumOfTiles(kernel);
00180     GTREPLAY_ASSERT(gMaxNumOfTiles);
00181     
00182     gNumOfInstructions = GTReplay_NumOfInstructions(kernel);
00183 
00184     // Traverse all the basic blocks 
00185     for (GTReplayBbl bbl = GTReplay_BblHead(kernel); GTReplay_BblValid(bbl); bbl = GTReplay_BblNext(bbl))
00186     {
00187         // Traverse all the instruction within the basic blocks 
00188         for (GTReplayIns ins = GTReplay_InsHead(bbl); GTReplay_InsValid(ins); ins = GTReplay_InsNext(ins))
00189         {
00190             // Register callback to be called before instruction execution
00191             GTReplay_RegisterCallbackBeforeIns(kernel, ins, BeforeInsCallback, NULL);
00192         }
00193     }
00194 
00195     // Allocate and initialize buffers
00196     icount.resize(gMaxNumOfTiles);
00197     totalIcount.resize(gNumOfInstructions, 0);
00198 
00199     gNumOfOpcodes = GTReplay_NumOpcodes();
00200     gNumOfDataTypes = GTReplay_NumDataTypes();
00201 
00202     opcodes.resize(gNumOfOpcodes);
00203 
00204     for (uint32_t i = 0; i < gNumOfOpcodes; i++)
00205     {
00206         opcodes[i].resize(gNumOfDataTypes+1, 0);
00207     }
00208 
00209     uint32_t kernelNameSize = 0;
00210     GTReplay_GetKernelName(kernel, &kernelNameSize, nullptr);
00211 
00212     char* buf = new char[kernelNameSize + 1]();
00213     GTReplay_GetKernelName(kernel, &kernelNameSize, buf);
00214 
00215     kernelName = std::string(buf);
00216     
00217     delete[] buf;
00218 }
00219 
00220 /*
00221  * GTReplay_Entry - tool entry point
00222  */
00223 extern "C"
00224 DLLEXP void FASTCALL GTReplay_Entry(int argc, const char *argv[])
00225 {
00226     // configure GTReplay
00227     ConfigureGTReplay(argc, argv);
00228     
00229     // register OnKernelBuild and OnKernelComplete callbacks
00230     GTReplay_RegisterOnKernelBuildCallback(OnKernelBuild);
00231     GTReplay_RegisterOnKernelCompleteCallback(OnKernelComplete);
00232 
00233     // Start GTReplay
00234     GTReplay_Start();
00235 }

(Back to the list of all GTReplay Sample Tools)


 All Data Structures Functions Variables Typedefs Enumerations Enumerator


  Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT