|
GTPin
|
The Imix tool provides the dynamic frequencies of each of the kernel instructions ("instruction mix"), in the form of opcode histograms
To run the Imix tool in its default configuration, use this command:
Profilers\GTReplay\intel64\gtreplay.exe -t imix -- path-to-the-directory-containing-the-trace
(Back to the list of all GTReplay Sample Tools)
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2021-2022 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /******************************************************************************************************* 00008 * IMIX tool 00009 * 00010 * Count instruction mix - dynamic count of each instruction in the trace and 00011 * provide dynamic opcode histograms per data type 00012 * 00013 * NOTE: the tool callbacks might be called from different threads. 00014 */ 00015 #include <stdio.h> 00016 #include <string.h> 00017 #include <vector> 00018 00019 #include "gtreplay_assert.h" 00020 #include "gtreplay_client.h" 00021 #include "knob_parser.h" 00022 #include "knob.h" 00023 00024 Knob<bool> gKnobNoHist("no-hist", false, "Forbids creating per data type dynamic opcode histograms"); 00025 Knob<bool> gKnobShowIp("show-ip", false, "Shows IPs of the instructions"); 00026 00027 // Global variables 00028 uint32_t gMaxNumOfHwThreads = 0; 00029 uint32_t gMaxNumOfTiles = 0; 00030 uint32_t gNumOfInstructions = 0; 00031 uint32_t gNumOfOpcodes = 0; 00032 uint32_t gNumOfDataTypes = 0; 00033 00034 struct PerTileInstCounter { 00035 PerTileInstCounter() 00036 { 00037 count.resize(gMaxNumOfHwThreads); 00038 for (uint32_t i = 0; i < gMaxNumOfHwThreads; i++) 00039 { 00040 count[i].resize(gNumOfInstructions, 0); 00041 } 00042 } 00043 std::vector<std::vector<uint64_t>> count; 00044 }; 00045 00046 std::vector<PerTileInstCounter> icount; 00047 std::vector<uint64_t> totalIcount; 00048 std::vector<std::vector<uint64_t>> opcodes; 00049 uint64_t total_icount = 0; 00050 std::string kernelName; 00051 00052 /* 00053 * BeforeInsCallback - callback called before instruction execution 00054 * 00055 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00056 * @params[in] ins - a handle to the current instruction 00057 * @params[in] state - a handle to the HW Thread state corresponding to tid 00058 */ 00059 void BeforeInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00060 { 00061 GTREPLAY_ASSERT(tileId < gMaxNumOfTiles&& tid < gMaxNumOfHwThreads); 00062 00063 // Obtain the instruction ID within the kernel 00064 uint32_t id = GTReplay_InsId(ins); 00065 // Update the instruction counter corresponding to the current HW thread and current instruction 00066 icount[tileId].count[tid][id]++; 00067 } 00068 00069 /* 00070 * AfterInsCallback - callback called after instruction execution 00071 * An illustration - this tool doesn't need to register after instruction callbacks 00072 * 00073 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00074 * @params[in] ins - a handle to the current instruction 00075 * @params[in] state - a handle to the HW Thread state corresponding to tid 00076 */ 00077 void AfterInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00078 { 00079 } 00080 00081 /* 00082 * OnKernelComplete - callback called upon kernel completion 00083 * 00084 * @params[in] kernel - a handle to the kernel 00085 */ 00086 void OnKernelComplete(GTReplayKernel kernel) 00087 { 00088 // accumulated icounts over HW threads 00089 for (uint32_t i = 0; i < gNumOfInstructions; i++) 00090 { 00091 totalIcount[i] = 0; 00092 00093 for (uint32_t tile = 0; tile < gMaxNumOfTiles; tile++) 00094 { 00095 for (uint32_t t = 0; t < gMaxNumOfHwThreads; t++) 00096 { 00097 totalIcount[i] += icount[tile].count[t][i]; 00098 } 00099 } 00100 00101 total_icount += totalIcount[i]; 00102 } 00103 00104 // Create opcode histograms 00105 for (uint32_t i = 0; i < gNumOfInstructions; i++) 00106 { 00107 GTReplayIns ins = GTReplay_Ins(kernel, i); 00108 uint32_t opcode = GTReplay_Opcode(ins); 00109 uint32_t dataType = GTReplay_DataType(ins); 00110 00111 opcodes[opcode][dataType] += totalIcount[i]; 00112 } 00113 00114 std::cout << "\n\n=================\n"; 00115 std::cout << "IMIX TOOL\n"; 00116 std::cout << "=================\n\n"; 00117 00118 std::cout << "Kernel: " << kernelName << "\n\n"; 00119 std::cout << "TOTAL ICOUNT = " << total_icount << "\n"; 00120 00121 00122 for (uint32_t i = 0; i < gNumOfInstructions; i++) 00123 { 00124 GTReplayIns ins = GTReplay_Ins(kernel, i); 00125 if (gKnobShowIp) 00126 { 00127 uint32_t ip = GTReplay_GetInsIp(ins); 00128 std::cout << std::hex << ip << ": "; 00129 } 00130 std::cout << "[" << std::dec << std::setw(15) << totalIcount[i] << "] " << std::string(GTReplay_Disasm(ins)) << std::endl; 00131 } 00132 00133 if (!gKnobNoHist) 00134 { 00135 // Print opcode histograms per data type 00136 for (uint32_t t = 0; t < gNumOfDataTypes; t++) 00137 { 00138 bool found = false; 00139 00140 for (uint32_t o = 0; o < gNumOfOpcodes; o++) 00141 { 00142 if (opcodes[o][t]) 00143 { 00144 found = true; 00145 break; 00146 } 00147 } 00148 00149 if (!found) 00150 { 00151 continue; 00152 } 00153 00154 std::cout << "\nDATA TYPE: " << std::string(GTReplay_DataTypeName(t)) << "\n\n"; 00155 00156 for (uint32_t o = 0; o < gNumOfOpcodes; o++) 00157 { 00158 if (opcodes[o][t]) 00159 { 00160 std::cout << std::setw(10) << std::string(GTReplay_OpcodeName(o)) << " " << opcodes[o][t] << "\n"; 00161 } 00162 } 00163 } 00164 } 00165 } 00166 00167 /* 00168 * OnKernelBuild - callback called before kernel execution 00169 * The purpose of this callback is to traverse the kernel binary and instrument callbacks 00170 * 00171 * @params[in] kernel - a handle to the kernel 00172 */ 00173 void OnKernelBuild(GTReplayKernel kernel) 00174 { 00175 uint32_t gModelId = GTReplay_GetModel(kernel); 00176 00177 gMaxNumOfHwThreads = GTReplay_MaxNumOfHWThreads(gModelId); 00178 00179 gMaxNumOfTiles = GTReplay_MaxNumOfTiles(kernel); 00180 GTREPLAY_ASSERT(gMaxNumOfTiles); 00181 00182 gNumOfInstructions = GTReplay_NumOfInstructions(kernel); 00183 00184 // Traverse all the basic blocks 00185 for (GTReplayBbl bbl = GTReplay_BblHead(kernel); GTReplay_BblValid(bbl); bbl = GTReplay_BblNext(bbl)) 00186 { 00187 // Traverse all the instruction within the basic blocks 00188 for (GTReplayIns ins = GTReplay_InsHead(bbl); GTReplay_InsValid(ins); ins = GTReplay_InsNext(ins)) 00189 { 00190 // Register callback to be called before instruction execution 00191 GTReplay_RegisterCallbackBeforeIns(kernel, ins, BeforeInsCallback, NULL); 00192 } 00193 } 00194 00195 // Allocate and initialize buffers 00196 icount.resize(gMaxNumOfTiles); 00197 totalIcount.resize(gNumOfInstructions, 0); 00198 00199 gNumOfOpcodes = GTReplay_NumOpcodes(); 00200 gNumOfDataTypes = GTReplay_NumDataTypes(); 00201 00202 opcodes.resize(gNumOfOpcodes); 00203 00204 for (uint32_t i = 0; i < gNumOfOpcodes; i++) 00205 { 00206 opcodes[i].resize(gNumOfDataTypes+1, 0); 00207 } 00208 00209 uint32_t kernelNameSize = 0; 00210 GTReplay_GetKernelName(kernel, &kernelNameSize, nullptr); 00211 00212 char* buf = new char[kernelNameSize + 1](); 00213 GTReplay_GetKernelName(kernel, &kernelNameSize, buf); 00214 00215 kernelName = std::string(buf); 00216 00217 delete[] buf; 00218 } 00219 00220 /* 00221 * GTReplay_Entry - tool entry point 00222 */ 00223 extern "C" 00224 DLLEXP void FASTCALL GTReplay_Entry(int argc, const char *argv[]) 00225 { 00226 // configure GTReplay 00227 ConfigureGTReplay(argc, argv); 00228 00229 // register OnKernelBuild and OnKernelComplete callbacks 00230 GTReplay_RegisterOnKernelBuildCallback(OnKernelBuild); 00231 GTReplay_RegisterOnKernelCompleteCallback(OnKernelComplete); 00232 00233 // Start GTReplay 00234 GTReplay_Start(); 00235 }
(Back to the list of all GTReplay Sample Tools)
Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT
1.7.4