1 | # VampirTrace metrics specification |
---|
2 | # |
---|
3 | # measurement definitions complement/extend native (and PAPI preset) definitions |
---|
4 | # measured metrics derive from a single measured counter (in principle) |
---|
5 | # all measure definitions are considered equivalent |
---|
6 | # (though some may be platform-specific) |
---|
7 | |
---|
8 | ### generic (PAPI) measurement aliases |
---|
9 | |
---|
10 | ### level 1 cache accesses |
---|
11 | measure L1_ACCESS = PAPI_L1_TCA |
---|
12 | measure L1_I_ACCESS = PAPI_L1_ICA |
---|
13 | measure L1_D_ACCESS = PAPI_L1_DCA |
---|
14 | |
---|
15 | ### level 1 cache reads |
---|
16 | measure L1_READ = PAPI_L1_TCR |
---|
17 | measure L1_I_READ = PAPI_L1_ICR # equivalent to PAPI_L1_ICA |
---|
18 | measure L1_D_READ = PAPI_L1_DCR |
---|
19 | |
---|
20 | ### level 1 cache writes |
---|
21 | measure L1_WRITE = PAPI_L1_TCW |
---|
22 | measure L1_I_WRITE = PAPI_L1_ICW # never defined |
---|
23 | measure L1_D_WRITE = PAPI_L1_DCW |
---|
24 | |
---|
25 | ### level 1 cache hits |
---|
26 | measure L1_HIT = PAPI_L1_TCH |
---|
27 | measure L1_I_HIT = PAPI_L1_ICH |
---|
28 | measure L1_D_HIT = PAPI_L1_DCH |
---|
29 | |
---|
30 | ### level 1 cache misses |
---|
31 | measure L1_MISS = PAPI_L1_TCM |
---|
32 | measure L1_I_MISS = PAPI_L1_ICM |
---|
33 | measure L1_D_MISS = PAPI_L1_DCM |
---|
34 | measure L1_D_READ_MISS = PAPI_L1_LDM |
---|
35 | measure L1_D_WRITE_MISS = PAPI_L1_STM |
---|
36 | |
---|
37 | ### alternate level 1 cache representation |
---|
38 | measure L1_INST = PAPI_L1_ICA |
---|
39 | measure L1_INST_HIT = PAPI_L1_ICH |
---|
40 | measure L1_INST_MISS = PAPI_L1_ICM |
---|
41 | measure L1_LOAD = PAPI_L1_DCR |
---|
42 | measure L1_LOAD_HIT = PAPI_L1_LDH # non-standard |
---|
43 | measure L1_LOAD_MISS = PAPI_L1_LDM |
---|
44 | measure L1_STORE = PAPI_L1_DCW |
---|
45 | measure L1_STORE_HIT = PAPI_L1_STH # non-standard |
---|
46 | measure L1_STORE_MISS = PAPI_L1_STM |
---|
47 | |
---|
48 | ### level 2 cache accesses |
---|
49 | measure L2_ACCESS = PAPI_L2_TCA |
---|
50 | measure L2_I_ACCESS = PAPI_L2_ICA |
---|
51 | measure L2_D_ACCESS = PAPI_L2_DCA |
---|
52 | |
---|
53 | ### level 2 cache reads |
---|
54 | measure L2_READ = PAPI_L2_TCR |
---|
55 | measure L2_I_READ = PAPI_L2_ICR # equivalent to PAPI_L2_ICA |
---|
56 | measure L2_D_READ = PAPI_L2_DCR |
---|
57 | |
---|
58 | ### level 2 cache writes |
---|
59 | measure L2_WRITE = PAPI_L2_TCW |
---|
60 | measure L2_I_WRITE = PAPI_L2_ICW # never defined |
---|
61 | measure L2_D_WRITE = PAPI_L2_DCW |
---|
62 | |
---|
63 | ### level 2 cache hits |
---|
64 | measure L2_HIT = PAPI_L2_TCH |
---|
65 | measure L2_I_HIT = PAPI_L2_ICH |
---|
66 | measure L2_D_HIT = PAPI_L2_DCH |
---|
67 | |
---|
68 | ### level 2 cache misses |
---|
69 | measure L2_MISS = PAPI_L2_TCM |
---|
70 | measure L2_I_MISS = PAPI_L2_ICM |
---|
71 | measure L2_D_MISS = PAPI_L2_DCM |
---|
72 | measure L2_D_READ_MISS = PAPI_L2_LDM |
---|
73 | measure L2_D_WRITE_MISS = PAPI_L2_STM |
---|
74 | |
---|
75 | ### alternate level 2 cache representation |
---|
76 | measure L2_INST = PAPI_L2_ICA |
---|
77 | measure L2_INST_HIT = PAPI_L2_ICH |
---|
78 | measure L2_INST_MISS = PAPI_L2_ICM |
---|
79 | measure L2_LOAD = PAPI_L2_DCR |
---|
80 | measure L2_LOAD_HIT = PAPI_L2_LDH # non-standard |
---|
81 | measure L2_LOAD_MISS = PAPI_L2_LDM |
---|
82 | measure L2_STORE = PAPI_L2_DCW |
---|
83 | measure L2_STORE_HIT = PAPI_L2_STH # non-standard |
---|
84 | measure L2_STORE_MISS = PAPI_L2_STM |
---|
85 | |
---|
86 | ### level 3 cache accesses |
---|
87 | measure L3_ACCESS = PAPI_L3_TCA |
---|
88 | measure L3_I_ACCESS = PAPI_L3_ICA |
---|
89 | measure L3_D_ACCESS = PAPI_L3_DCA |
---|
90 | |
---|
91 | ### level 3 cache reads |
---|
92 | measure L3_READ = PAPI_L3_TCR |
---|
93 | measure L3_I_READ = PAPI_L3_ICR # equivalent to PAPI_L3_ICA |
---|
94 | measure L3_D_READ = PAPI_L3_DCR |
---|
95 | |
---|
96 | ### level 3 cache writes |
---|
97 | measure L3_WRITE = PAPI_L3_TCW |
---|
98 | measure L3_I_WRITE = PAPI_L3_ICW # never defined |
---|
99 | measure L3_D_WRITE = PAPI_L3_DCW |
---|
100 | |
---|
101 | ### level 3 cache hits |
---|
102 | measure L3_HIT = PAPI_L3_TCH |
---|
103 | measure L3_I_HIT = PAPI_L3_ICH |
---|
104 | measure L3_D_HIT = PAPI_L3_DCH |
---|
105 | |
---|
106 | ### level 3 cache misses |
---|
107 | measure L3_MISS = PAPI_L3_TCM |
---|
108 | measure L3_I_MISS = PAPI_L3_ICM |
---|
109 | measure L3_D_MISS = PAPI_L3_DCM |
---|
110 | measure L3_D_READ_MISS = PAPI_L3_LDM |
---|
111 | measure L3_D_WRITE_MISS = PAPI_L3_STM |
---|
112 | |
---|
113 | ### alternate level 3 cache representation |
---|
114 | measure L3_INST = PAPI_L3_ICA |
---|
115 | measure L3_INST_HIT = PAPI_L3_ICH |
---|
116 | measure L3_INST_MISS = PAPI_L3_ICM |
---|
117 | measure L3_LOAD = PAPI_L3_DCR |
---|
118 | measure L3_LOAD_HIT = PAPI_L3_LDH # non-standard |
---|
119 | measure L3_LOAD_MISS = PAPI_L3_LDM |
---|
120 | measure L3_STORE = PAPI_L3_DCW |
---|
121 | measure L3_STORE_HIT = PAPI_L3_STH # non-standard |
---|
122 | measure L3_STORE_MISS = PAPI_L3_STM |
---|
123 | |
---|
124 | ### TLB misses |
---|
125 | measure TLB_MISS = PAPI_TLB_TL |
---|
126 | measure TLB_I_MISS = PAPI_TLB_IM |
---|
127 | measure TLB_D_MISS = PAPI_TLB_DM |
---|
128 | |
---|
129 | ### instructions |
---|
130 | measure INSTRUCTION = PAPI_TOT_INS |
---|
131 | measure INTEGER = PAPI_INT_INS |
---|
132 | measure FLOATING_POINT = PAPI_FP_INS |
---|
133 | measure FP_ADD = PAPI_FAD_INS |
---|
134 | measure FP_MUL = PAPI_FML_INS |
---|
135 | measure FP_FMA = PAPI_FMA_INS |
---|
136 | measure FP_DIV = PAPI_FDV_INS |
---|
137 | measure FP_INV = PAPI_FNV_INS |
---|
138 | measure FP_SQRT = PAPI_FSQ_INS |
---|
139 | measure VECTOR = PAPI_VEC_INS |
---|
140 | measure SYNCH = PAPI_SYC_INS |
---|
141 | measure LOAD_STORE = PAPI_LST_INS |
---|
142 | measure LOAD = PAPI_LD_INS |
---|
143 | measure STORE = PAPI_SR_INS |
---|
144 | measure COND_STORE = PAPI_CSR_TOT |
---|
145 | measure COND_STORE_SUCCESS = PAPI_CSR_SUC |
---|
146 | measure COND_STORE_UNSUCCESS = PAPI_CSR_FAL |
---|
147 | measure BRANCH = PAPI_BR_INS |
---|
148 | measure UNCOND_BRANCH = PAPI_BR_UCN |
---|
149 | measure COND_BRANCH = PAPI_BR_CN |
---|
150 | measure COND_BRANCH_TAKEN = PAPI_BR_TKN |
---|
151 | measure COND_BRANCH_NOTTAKEN = PAPI_BR_NTK |
---|
152 | measure COND_BRANCH_PRED = PAPI_BR_PRC |
---|
153 | measure COND_BRANCH_MISPRED = PAPI_BR_MSP |
---|
154 | |
---|
155 | ### cycles |
---|
156 | measure CYCLES = PAPI_TOT_CYC |
---|
157 | |
---|
158 | ### idle units |
---|
159 | measure INTEGER_UNIT_IDLE = PAPI_FXU_IDL |
---|
160 | measure FLOAT_UNIT_IDLE = PAPI_FPU_IDL |
---|
161 | measure BRANCH_UNIT_IDLE = PAPI_BRU_IDL |
---|
162 | measure LOADSTORE_UNIT_IDLE = PAPI_LSU_IDL |
---|
163 | |
---|
164 | ### stalls |
---|
165 | measure STALL_MEMORY_ACCESS = PAPI_MEM_SCY |
---|
166 | measure STALL_MEMORY_READ = PAPI_MEM_RCY |
---|
167 | measure STALL_MEMORY_WRITE = PAPI_MEM_WCY |
---|
168 | measure STALL_INST_ISSUE = PAPI_STL_ICY |
---|
169 | |
---|
170 | # platform-specific measurement aliases |
---|
171 | # (complement or redefine generic measurement aliases) |
---|
172 | # may need to key to particular platform if ambiguity |
---|
173 | |
---|
174 | ### POWER4-specific metrics |
---|
175 | measure FP_LOAD = PM_LSU_LDF |
---|
176 | measure FP_STORE = PM_FPU_STF |
---|
177 | measure FP_MISC = PM_FPU_FMOV_FEST |
---|
178 | |
---|
179 | ### UltraSPARC-III/IV-specific metrics |
---|
180 | measure STALL_L1_MISS = Re_DC_miss # /1 |
---|
181 | measure STALL_L2_MISS = Re_EC_miss # /1 |
---|
182 | measure STALL_IC_MISS = Dispatch0_IC_miss # /0 |
---|
183 | measure STALL_STOREQ = Rstall_storeQ # /0 |
---|
184 | measure STALL_IU_USE = Rstall_IU_use # /0 |
---|
185 | measure STALL_FP_USE = Rstall_FP_use # /1 |
---|
186 | measure STALL_PC_MISS = Re_PC_miss # /1 |
---|
187 | measure STALL_RAW_MISS = Re_RAW_miss # /1 |
---|
188 | measure STALL_FPU_BYPASS = Re_FPU_bypass # /1 |
---|
189 | measure STALL_MISPRED = Dispatch0_mispred # /1 |
---|
190 | measure STALL_BR_TARGET = Dispatch0_br_target # /0 |
---|
191 | measure STALL_2ND_BR = Dispatch0_2nd_br # /0 |
---|
192 | measure STALL_L1_MSOVHD = Re_DC_missovhd # /1 |
---|
193 | |
---|
194 | ### groupings of metrics for collective measurement |
---|
195 | |
---|
196 | ### Opteron groupings (max 4 in group, unrestricted) |
---|
197 | aggroup OPTERON_DC1 = DC_ACCESS DC_MISS DC_L2_REFILL_I DC_SYS_REFILL_I |
---|
198 | aggroup OPTERON_DC2 = DC_L2_REFILL_M DC_L2_REFILL_O DC_L2_REFILL_E DC_L2_REFILL_S |
---|
199 | aggroup OPTERON_DC3 = DC_SYS_REFILL_M DC_SYS_REFILL_O DC_SYS_REFILL_E DC_SYS_REFILL_S |
---|
200 | aggroup OPTERON_IC = IC_FETCH IC_MISS IC_L2_REFILL IC_SYS_REFILL |
---|
201 | aggroup OPTERON_TLB = DC_L1_DTLB_MISS_AND_L2_DTLB_MISS DC_L1_DTLB_MISS_AND_L2_DTLB_HIT IC_L1ITLB_MISS_AND_L2ITLB_MISS IC_L1ITLB_MISS_AND_L2ITLB_HIT |
---|
202 | |
---|
203 | aggroup OPTERON_BR = FR_BR FR_BR_MIS FR_BR_TAKEN FR_BR_TAKEN_MIS |
---|
204 | aggroup OPTERON_FP = FP_ADD_PIPE FP_MULT_PIPE FP_ST_PIPE FP_FAST_FLAG |
---|
205 | aggroup OPTERON_FPU = FR_FPU_X87 FR_FPU_MMX_3D FR_FPU_SSE_SSE2_PACKED FR_FPU_SSE_SSE2_SCALAR |
---|
206 | |
---|
207 | aggroup OPTERON_ST1 = IC_FETCH_STALL FR_DECODER_EMPTY FR_DISPATCH_STALLS FR_DISPATCH_STALLS_FULL_FPU |
---|
208 | aggroup OPTERON_ST2 = FR_DISPATCH_STALLS_FULL_LS FR_DISPATCH_STALLS_FULL_REORDER FR_DISPATCH_STALLS_FULL_RESERVATION FR_DISPATCH_STALLS_BR |
---|
209 | aggroup OPTERON_ST3 = FR_DISPATCH_STALLS_FAR FR_DISPATCH_STALLS_SER FR_DISPATCH_STALLS_SEG FR_DISPATCH_STALLS_QUIET |
---|
210 | |
---|
211 | aggroup OPTERON_ETC = FR_X86_INS CPU_CLK_UNHALTED FR_HW_INTS FP_NONE_RET |
---|
212 | aggroup OPTERON_HTM = HT_LL_MEM_XFR HT_LR_MEM_XFR HT_RL_MEM_XFR |
---|
213 | aggroup OPTERON_HTI = HT_LL_IO_XFR HT_LR_IO_XFR HT_RL_IO_XFR |
---|
214 | |
---|
215 | ### POWER4-specific groupings (max 8 in group, restricted) |
---|
216 | aggroup POWER4_DC = PM_DATA_FROM_L2 PM_DATA_FROM_L25_SHR PM_DATA_FROM_L25_MOD PM_DATA_FROM_L275_SHR PM_DATA_FROM_L275_MOD PM_DATA_FROM_L3 PM_DATA_FROM_L35 PM_DATA_FROM_MEM # 5 |
---|
217 | aggroup POWER4_IC = PM_INST_FROM_PREF PM_INST_FROM_L1 PM_INST_FROM_L2 PM_INST_FROM_L25_L275 PM_INST_FROM_L3 PM_INST_FROM_L35 PM_INST_FROM_MEM # 6 |
---|
218 | aggroup POWER4_L1 = PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56 |
---|
219 | aggroup POWER4_TLB = PM_ITLB_MISS PM_DTLB_MISS |
---|
220 | aggroup POWER4_LX = PM_ITLB_MISS PM_DTLB_MISS PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56 |
---|
221 | aggroup POWER4_BR = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA # 3,55,61 |
---|
222 | aggroup POWER4_BRT = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA PM_BIQ_IDU_FULL_CYC PM_BRQ_FULL_CYC PM_L1_WRITE_CYC PM_INST_CMPL PM_CYC # 55 |
---|
223 | aggroup POWER4_LSF = PM_FPU_STF PM_LSU_LDF # 15,54,60 |
---|
224 | aggroup POWER4_STL = PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54 |
---|
225 | aggroup POWER4_LST = PM_INST_CMPL PM_FPU_STF PM_LSU_LDF PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54 |
---|
226 | aggroup POWER4_FP = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST # 53 |
---|
227 | aggroup POWER4_IFP = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST PM_FXU_FIN # 53 |
---|
228 | aggroup POWER4_II = PM_INST_DISP PM_INST_CMPL # 1,2,18,20 |
---|
229 | |
---|
230 | ### MIPS-R1200 groupings (max 32 in group, unrestricted) |
---|
231 | aggroup R12000_ALL = TLB_misses primary_data_cache_misses secondary_data_cache_misses primary_instruction_cache_misses secondary_instruction_cache_misses graduated_instructions mispredicted_branches graduated_loads graduated_stores graduated_floating-point_instructions decoded_instructions cycles prefetch_primary_data_cache_misses |
---|
232 | aggroup R12000_ALL_PAPI = PAPI_L1_DCM PAPI_L1_ICM PAPI_L2_DCM PAPI_L2_ICM PAPI_TLB_TL PAPI_TOT_INS PAPI_FP_INS PAPI_LD_INS PAPI_SR_INS PAPI_TOT_IIS PAPI_BR_CN PAPI_BR_MSP PAPI_CSR_TOT PAPI_CSR_FAL PAPI_TOT_CYC PAPI_PRF_DM PAPI_CA_INV PAPI_CA_ITV |
---|
233 | |
---|
234 | ### UltraSPARC-III/IV groupings (max 2 in group, restricted) |
---|
235 | aggroup US3_CPI = Cycle_cnt Instr_cnt # duplicates |
---|
236 | |
---|
237 | # cycles/stalls groups |
---|
238 | aggroup US3_SMP = Dispatch_rs_mispred Dispatch0_mispred # stall misprediction |
---|
239 | aggroup US3_SUS = Rstall_IU_use Rstall_FP_use # stall IU/FP use |
---|
240 | aggroup US3_SST = Rstall_storeQ Re_RAW_miss # stall store |
---|
241 | aggroup US3_SCD = Cycle_cnt Re_DC_miss |
---|
242 | aggroup US3_SCO = Dispatch0_br_target Re_DC_missovhd |
---|
243 | aggroup US3_SCE = Dispatch0_2nd_br Re_EC_miss |
---|
244 | aggroup US3_SCP = Dispatch0_IC_miss Re_PC_miss |
---|
245 | aggroup US3_SCX = SI_ciq_flow Re_FPU_bypass # Re_FPU_bypass always zero? |
---|
246 | |
---|
247 | # instruction and TLB groups |
---|
248 | aggroup US3_FPU = FA_pipe_completion FM_pipe_completion |
---|
249 | aggroup US3_BMS = IU_Stat_Br_miss_taken IU_Stat_Br_miss_untaken |
---|
250 | aggroup US3_BCS = IU_Stat_Br_count_taken IU_Stat_Br_count_untaken |
---|
251 | aggroup US3_ITL = Instr_cnt ITLB_miss |
---|
252 | aggroup US3_DTL = Cycle_cnt DTLB_miss |
---|
253 | |
---|
254 | # memory and cache groups |
---|
255 | aggroup US3_ICH = IC_ref IC_miss |
---|
256 | aggroup US3_DCR = DC_rd DC_rd_miss |
---|
257 | aggroup US3_DCW = DC_wr DC_wr_miss |
---|
258 | aggroup US3_ECI = EC_write_hit_RTO EC_ic_miss |
---|
259 | aggroup US3_ECM = EC_rd_miss EC_misses |
---|
260 | |
---|
261 | # locality/SSM and other miscellaneous groups |
---|
262 | aggroup US3_ECL = EC_miss_local EC_miss_remote # only SF15000/SF25000 |
---|
263 | aggroup US3_ECX = EC_wb_remote EC_miss_mtag_remote # only SF15000/SF25000 |
---|
264 | aggroup US3_ECW = EC_ref EC_wb |
---|
265 | aggroup US3_ECS = EC_snoop_inv EC_snoop_cb |
---|
266 | aggroup US3_PCR = PC_port0_rd PC_port1_rd |
---|
267 | aggroup US3_ETC = SI_snoop PC_MS_misses |
---|
268 | aggroup US3_WCM = SI_owned WC_miss |
---|
269 | |
---|
270 | # memory controller groups |
---|
271 | aggroup US3_SM1 = MC_stalls_0 MC_stalls_1 |
---|
272 | aggroup US3_SM2 = MC_stalls_2 MC_stalls_3 |
---|
273 | aggroup US3_MC0 = MC_reads_0 MC_writes_0 |
---|
274 | aggroup US3_MC1 = MC_reads_1 MC_writes_1 |
---|
275 | aggroup US3_MC2 = MC_reads_2 MC_writes_2 |
---|
276 | aggroup US3_MC3 = MC_reads_3 MC_writes_3 |
---|
277 | |
---|
278 | ### Itanium2 groupings (max 4 in group, partially restricted) |
---|
279 | aggroup ITANIUM2_TLB = ITLB_MISSES_FETCH_L1ITLB ITLB_MISSES_FETCH_L2ITLB L2DTLB_MISSES L1DTLB_TRANSFER |
---|
280 | aggroup ITANIUM2_BR = BRANCH_EVENT BR_MISPRED_DETAIL_ALL_CORRECT_PRED BR_MISPRED_DETAIL_ALL_WRONG_PATH BR_MISPRED_DETAIL_ALL_WRONG_TARGET |
---|
281 | aggroup ITANIUM2_STL = DISP_STALLED BACK_END_BUBBLE_ALL BE_EXE_BUBBLE_ALL BE_EXE_BUBBLE_FRALL |
---|
282 | |
---|
283 | aggroup ITANIUM2_L1D = DATA_REFERENCES_SET1 L1D_READS_SET1 L1D_READ_MISSES_ALL L2_DATA_REFERENCES_L2_ALL |
---|
284 | aggroup ITANIUM2_L2D = L2_DATA_REFERENCES_L2_DATA_READS L2_DATA_REFERENCES_L2_DATA_WRITES L3_READS_DATA_READ_ALL L3_WRITES_DATA_WRITE_ALL |
---|
285 | aggroup ITANIUM2_L3D = L3_READS_DATA_READ_HIT L3_READS_DATA_READ_MISS L3_WRITES_DATA_WRITE_HIT L3_WRITES_DATA_WRITE_MISS |
---|
286 | aggroup ITANIUM2_LXD = L2_MISSES L3_REFERENCES L3_READS_ALL_MISS L3_WRITES_ALL_MISS |
---|
287 | aggroup ITANIUM2_LXX = L3_MISSES L3_WRITES_L2_WB_HIT L3_WRITES_L2_WB_MISS |
---|
288 | |
---|
289 | aggroup ITANIUM2_ICD = L1I_READS L2_INST_DEMAND_READS L3_READS_DINST_FETCH_HIT L3_READS_DINST_FETCH_MISS # instruction cache (demand-load only) |
---|
290 | aggroup ITANIUM2_ICP = L1I_PREFETCHES L2_INST_PREFETCHES L3_READS_INST_FETCH_HIT L3_READS_INST_FETCH_MISS # instruction cache (incl. prefetch) |
---|
291 | |
---|
292 | aggroup ITANIUM2_IN1 = INST_DISPERSED IA32_INST_RETIRED IA64_INST_RETIRED LOADS_RETIRED |
---|
293 | aggroup ITANIUM2_IN2 = FP_OPS_RETIRED LOADS_RETIRED CPU_CYCLES ISA_TRANSITIONS |
---|
294 | aggroup ITANIUM2_ISA = IA32_INST_RETIRED IA64_INST_RETIRED IA32_ISA_TRANSITIONS STORES_RETIRED |
---|
295 | aggroup ITANIUM2_FLP = CPU_CYCLES FP_OPS_RETIRED INST_DISPERSED LOADS_RETIRED |
---|
296 | |
---|
297 | ### compositions are derived by combining measurements and create hierarchies |
---|
298 | ### **** generic hierarchy **** |
---|
299 | |
---|
300 | ### cycles (including stalls) |
---|
301 | compose CYCLES = BUSY + STALL + IDLE |
---|
302 | compose STALL = DISPATCH + UNIT_USE + RECIRCULATE |
---|
303 | |
---|
304 | ### instructions |
---|
305 | compose INSTRUCTION = BRANCH + INTEGER + FLOATING_POINT + MEMORY |
---|
306 | compose BRANCH = BRANCH_PRED + BRANCH_MISP |
---|
307 | compose FLOATING_POINT = FP_ADD + FP_MUL + FP_FMA + FP_DIV + FP_INV + FP_SQRT + FP_MISC |
---|
308 | compose MEMORY = LOAD + STORE + SYNCH |
---|
309 | |
---|
310 | ### data accesses (to cache hierarchy & memory) |
---|
311 | compose DATA_ACCESS = DATA_HIT_L1$ + DATA_HIT_L2$ + DATA_HIT_L3$ + DATA_HIT_MEM |
---|
312 | compose DATA_HIT_L1$ = DATA_STORE_INTO_L1$ + DATA_LOAD_FROM_L1$ |
---|
313 | compose DATA_HIT_L2$ = DATA_STORE_INTO_L2$ + DATA_LOAD_FROM_L2$ |
---|
314 | compose DATA_HIT_L3$ = DATA_STORE_INTO_L3$ + DATA_LOAD_FROM_L3$ |
---|
315 | compose DATA_HIT_MEM = DATA_STORE_INTO_MEM + DATA_LOAD_FROM_MEM |
---|
316 | |
---|
317 | ### instruction accesses (to cache hierarchy & memory) |
---|
318 | compose INST_ACCESS = INST_HIT_PREF + INST_HIT_L1$ + INST_HIT_L2$ + INST_HIT_L3$ + INST_HIT_MEM |
---|
319 | |
---|
320 | ### TLB accesses (instruction & data) |
---|
321 | compose TLB_ACCESS = DATA_TLB_ACCESS + INST_TLB_ACCESS |
---|
322 | compose DATA_TLB_ACCESS = DATA_TLB_HIT + DATA_TLB_MISS |
---|
323 | compose INST_TLB_ACCESS = INST_TLB_HIT + INST_TLB_MISS |
---|
324 | |
---|