[97] | 1 | # VampirTrace metrics specification |
---|
| 2 | # |
---|
| 3 | # measurement definitions complement/extend native (and PAPI preset) definitions |
---|
| 4 | # measured metrics derive from a single measured counter (in principle) |
---|
| 5 | # all measure definitions are considered equivalent |
---|
| 6 | # (though some may be platform-specific) |
---|
| 7 | |
---|
| 8 | ### generic (PAPI) measurement aliases |
---|
| 9 | |
---|
| 10 | ### level 1 cache accesses |
---|
| 11 | measure L1_ACCESS = PAPI_L1_TCA |
---|
| 12 | measure L1_I_ACCESS = PAPI_L1_ICA |
---|
| 13 | measure L1_D_ACCESS = PAPI_L1_DCA |
---|
| 14 | |
---|
| 15 | ### level 1 cache reads |
---|
| 16 | measure L1_READ = PAPI_L1_TCR |
---|
| 17 | measure L1_I_READ = PAPI_L1_ICR # equivalent to PAPI_L1_ICA |
---|
| 18 | measure L1_D_READ = PAPI_L1_DCR |
---|
| 19 | |
---|
| 20 | ### level 1 cache writes |
---|
| 21 | measure L1_WRITE = PAPI_L1_TCW |
---|
| 22 | measure L1_I_WRITE = PAPI_L1_ICW # never defined |
---|
| 23 | measure L1_D_WRITE = PAPI_L1_DCW |
---|
| 24 | |
---|
| 25 | ### level 1 cache hits |
---|
| 26 | measure L1_HIT = PAPI_L1_TCH |
---|
| 27 | measure L1_I_HIT = PAPI_L1_ICH |
---|
| 28 | measure L1_D_HIT = PAPI_L1_DCH |
---|
| 29 | |
---|
| 30 | ### level 1 cache misses |
---|
| 31 | measure L1_MISS = PAPI_L1_TCM |
---|
| 32 | measure L1_I_MISS = PAPI_L1_ICM |
---|
| 33 | measure L1_D_MISS = PAPI_L1_DCM |
---|
| 34 | measure L1_D_READ_MISS = PAPI_L1_LDM |
---|
| 35 | measure L1_D_WRITE_MISS = PAPI_L1_STM |
---|
| 36 | |
---|
| 37 | ### alternate level 1 cache representation |
---|
| 38 | measure L1_INST = PAPI_L1_ICA |
---|
| 39 | measure L1_INST_HIT = PAPI_L1_ICH |
---|
| 40 | measure L1_INST_MISS = PAPI_L1_ICM |
---|
| 41 | measure L1_LOAD = PAPI_L1_DCR |
---|
| 42 | measure L1_LOAD_HIT = PAPI_L1_LDH # non-standard |
---|
| 43 | measure L1_LOAD_MISS = PAPI_L1_LDM |
---|
| 44 | measure L1_STORE = PAPI_L1_DCW |
---|
| 45 | measure L1_STORE_HIT = PAPI_L1_STH # non-standard |
---|
| 46 | measure L1_STORE_MISS = PAPI_L1_STM |
---|
| 47 | |
---|
| 48 | ### level 2 cache accesses |
---|
| 49 | measure L2_ACCESS = PAPI_L2_TCA |
---|
| 50 | measure L2_I_ACCESS = PAPI_L2_ICA |
---|
| 51 | measure L2_D_ACCESS = PAPI_L2_DCA |
---|
| 52 | |
---|
| 53 | ### level 2 cache reads |
---|
| 54 | measure L2_READ = PAPI_L2_TCR |
---|
| 55 | measure L2_I_READ = PAPI_L2_ICR # equivalent to PAPI_L2_ICA |
---|
| 56 | measure L2_D_READ = PAPI_L2_DCR |
---|
| 57 | |
---|
| 58 | ### level 2 cache writes |
---|
| 59 | measure L2_WRITE = PAPI_L2_TCW |
---|
| 60 | measure L2_I_WRITE = PAPI_L2_ICW # never defined |
---|
| 61 | measure L2_D_WRITE = PAPI_L2_DCW |
---|
| 62 | |
---|
| 63 | ### level 2 cache hits |
---|
| 64 | measure L2_HIT = PAPI_L2_TCH |
---|
| 65 | measure L2_I_HIT = PAPI_L2_ICH |
---|
| 66 | measure L2_D_HIT = PAPI_L2_DCH |
---|
| 67 | |
---|
| 68 | ### level 2 cache misses |
---|
| 69 | measure L2_MISS = PAPI_L2_TCM |
---|
| 70 | measure L2_I_MISS = PAPI_L2_ICM |
---|
| 71 | measure L2_D_MISS = PAPI_L2_DCM |
---|
| 72 | measure L2_D_READ_MISS = PAPI_L2_LDM |
---|
| 73 | measure L2_D_WRITE_MISS = PAPI_L2_STM |
---|
| 74 | |
---|
| 75 | ### alternate level 2 cache representation |
---|
| 76 | measure L2_INST = PAPI_L2_ICA |
---|
| 77 | measure L2_INST_HIT = PAPI_L2_ICH |
---|
| 78 | measure L2_INST_MISS = PAPI_L2_ICM |
---|
| 79 | measure L2_LOAD = PAPI_L2_DCR |
---|
| 80 | measure L2_LOAD_HIT = PAPI_L2_LDH # non-standard |
---|
| 81 | measure L2_LOAD_MISS = PAPI_L2_LDM |
---|
| 82 | measure L2_STORE = PAPI_L2_DCW |
---|
| 83 | measure L2_STORE_HIT = PAPI_L2_STH # non-standard |
---|
| 84 | measure L2_STORE_MISS = PAPI_L2_STM |
---|
| 85 | |
---|
| 86 | ### level 3 cache accesses |
---|
| 87 | measure L3_ACCESS = PAPI_L3_TCA |
---|
| 88 | measure L3_I_ACCESS = PAPI_L3_ICA |
---|
| 89 | measure L3_D_ACCESS = PAPI_L3_DCA |
---|
| 90 | |
---|
| 91 | ### level 3 cache reads |
---|
| 92 | measure L3_READ = PAPI_L3_TCR |
---|
| 93 | measure L3_I_READ = PAPI_L3_ICR # equivalent to PAPI_L3_ICA |
---|
| 94 | measure L3_D_READ = PAPI_L3_DCR |
---|
| 95 | |
---|
| 96 | ### level 3 cache writes |
---|
| 97 | measure L3_WRITE = PAPI_L3_TCW |
---|
| 98 | measure L3_I_WRITE = PAPI_L3_ICW # never defined |
---|
| 99 | measure L3_D_WRITE = PAPI_L3_DCW |
---|
| 100 | |
---|
| 101 | ### level 3 cache hits |
---|
| 102 | measure L3_HIT = PAPI_L3_TCH |
---|
| 103 | measure L3_I_HIT = PAPI_L3_ICH |
---|
| 104 | measure L3_D_HIT = PAPI_L3_DCH |
---|
| 105 | |
---|
| 106 | ### level 3 cache misses |
---|
| 107 | measure L3_MISS = PAPI_L3_TCM |
---|
| 108 | measure L3_I_MISS = PAPI_L3_ICM |
---|
| 109 | measure L3_D_MISS = PAPI_L3_DCM |
---|
| 110 | measure L3_D_READ_MISS = PAPI_L3_LDM |
---|
| 111 | measure L3_D_WRITE_MISS = PAPI_L3_STM |
---|
| 112 | |
---|
| 113 | ### alternate level 3 cache representation |
---|
| 114 | measure L3_INST = PAPI_L3_ICA |
---|
| 115 | measure L3_INST_HIT = PAPI_L3_ICH |
---|
| 116 | measure L3_INST_MISS = PAPI_L3_ICM |
---|
| 117 | measure L3_LOAD = PAPI_L3_DCR |
---|
| 118 | measure L3_LOAD_HIT = PAPI_L3_LDH # non-standard |
---|
| 119 | measure L3_LOAD_MISS = PAPI_L3_LDM |
---|
| 120 | measure L3_STORE = PAPI_L3_DCW |
---|
| 121 | measure L3_STORE_HIT = PAPI_L3_STH # non-standard |
---|
| 122 | measure L3_STORE_MISS = PAPI_L3_STM |
---|
| 123 | |
---|
| 124 | ### TLB misses |
---|
| 125 | measure TLB_MISS = PAPI_TLB_TL |
---|
| 126 | measure TLB_I_MISS = PAPI_TLB_IM |
---|
| 127 | measure TLB_D_MISS = PAPI_TLB_DM |
---|
| 128 | |
---|
| 129 | ### instructions |
---|
| 130 | measure INSTRUCTION = PAPI_TOT_INS |
---|
| 131 | measure INTEGER = PAPI_INT_INS |
---|
| 132 | measure FLOATING_POINT = PAPI_FP_INS |
---|
| 133 | measure FP_ADD = PAPI_FAD_INS |
---|
| 134 | measure FP_MUL = PAPI_FML_INS |
---|
| 135 | measure FP_FMA = PAPI_FMA_INS |
---|
| 136 | measure FP_DIV = PAPI_FDV_INS |
---|
| 137 | measure FP_INV = PAPI_FNV_INS |
---|
| 138 | measure FP_SQRT = PAPI_FSQ_INS |
---|
| 139 | measure VECTOR = PAPI_VEC_INS |
---|
| 140 | measure SYNCH = PAPI_SYC_INS |
---|
| 141 | measure LOAD_STORE = PAPI_LST_INS |
---|
| 142 | measure LOAD = PAPI_LD_INS |
---|
| 143 | measure STORE = PAPI_SR_INS |
---|
| 144 | measure COND_STORE = PAPI_CSR_TOT |
---|
| 145 | measure COND_STORE_SUCCESS = PAPI_CSR_SUC |
---|
| 146 | measure COND_STORE_UNSUCCESS = PAPI_CSR_FAL |
---|
| 147 | measure BRANCH = PAPI_BR_INS |
---|
| 148 | measure UNCOND_BRANCH = PAPI_BR_UCN |
---|
| 149 | measure COND_BRANCH = PAPI_BR_CN |
---|
| 150 | measure COND_BRANCH_TAKEN = PAPI_BR_TKN |
---|
| 151 | measure COND_BRANCH_NOTTAKEN = PAPI_BR_NTK |
---|
| 152 | measure COND_BRANCH_PRED = PAPI_BR_PRC |
---|
| 153 | measure COND_BRANCH_MISPRED = PAPI_BR_MSP |
---|
| 154 | |
---|
| 155 | ### cycles |
---|
| 156 | measure CYCLES = PAPI_TOT_CYC |
---|
| 157 | |
---|
| 158 | ### idle units |
---|
| 159 | measure INTEGER_UNIT_IDLE = PAPI_FXU_IDL |
---|
| 160 | measure FLOAT_UNIT_IDLE = PAPI_FPU_IDL |
---|
| 161 | measure BRANCH_UNIT_IDLE = PAPI_BRU_IDL |
---|
| 162 | measure LOADSTORE_UNIT_IDLE = PAPI_LSU_IDL |
---|
| 163 | |
---|
| 164 | ### stalls |
---|
| 165 | measure STALL_MEMORY_ACCESS = PAPI_MEM_SCY |
---|
| 166 | measure STALL_MEMORY_READ = PAPI_MEM_RCY |
---|
| 167 | measure STALL_MEMORY_WRITE = PAPI_MEM_WCY |
---|
| 168 | measure STALL_INST_ISSUE = PAPI_STL_ICY |
---|
| 169 | |
---|
| 170 | # platform-specific measurement aliases |
---|
| 171 | # (complement or redefine generic measurement aliases) |
---|
| 172 | # may need to key to particular platform if ambiguity |
---|
| 173 | |
---|
| 174 | ### POWER4-specific metrics |
---|
| 175 | measure FP_LOAD = PM_LSU_LDF |
---|
| 176 | measure FP_STORE = PM_FPU_STF |
---|
| 177 | measure FP_MISC = PM_FPU_FMOV_FEST |
---|
| 178 | |
---|
| 179 | ### UltraSPARC-III/IV-specific metrics |
---|
| 180 | measure STALL_L1_MISS = Re_DC_miss # /1 |
---|
| 181 | measure STALL_L2_MISS = Re_EC_miss # /1 |
---|
| 182 | measure STALL_IC_MISS = Dispatch0_IC_miss # /0 |
---|
| 183 | measure STALL_STOREQ = Rstall_storeQ # /0 |
---|
| 184 | measure STALL_IU_USE = Rstall_IU_use # /0 |
---|
| 185 | measure STALL_FP_USE = Rstall_FP_use # /1 |
---|
| 186 | measure STALL_PC_MISS = Re_PC_miss # /1 |
---|
| 187 | measure STALL_RAW_MISS = Re_RAW_miss # /1 |
---|
| 188 | measure STALL_FPU_BYPASS = Re_FPU_bypass # /1 |
---|
| 189 | measure STALL_MISPRED = Dispatch0_mispred # /1 |
---|
| 190 | measure STALL_BR_TARGET = Dispatch0_br_target # /0 |
---|
| 191 | measure STALL_2ND_BR = Dispatch0_2nd_br # /0 |
---|
| 192 | measure STALL_L1_MSOVHD = Re_DC_missovhd # /1 |
---|
| 193 | |
---|
| 194 | ### groupings of metrics for collective measurement |
---|
| 195 | |
---|
| 196 | ### Opteron groupings (max 4 in group, unrestricted) |
---|
| 197 | aggroup OPTERON_DC1 = DC_ACCESS DC_MISS DC_L2_REFILL_I DC_SYS_REFILL_I |
---|
| 198 | aggroup OPTERON_DC2 = DC_L2_REFILL_M DC_L2_REFILL_O DC_L2_REFILL_E DC_L2_REFILL_S |
---|
| 199 | aggroup OPTERON_DC3 = DC_SYS_REFILL_M DC_SYS_REFILL_O DC_SYS_REFILL_E DC_SYS_REFILL_S |
---|
| 200 | aggroup OPTERON_IC = IC_FETCH IC_MISS IC_L2_REFILL IC_SYS_REFILL |
---|
| 201 | aggroup OPTERON_TLB = DC_L1_DTLB_MISS_AND_L2_DTLB_MISS DC_L1_DTLB_MISS_AND_L2_DTLB_HIT IC_L1ITLB_MISS_AND_L2ITLB_MISS IC_L1ITLB_MISS_AND_L2ITLB_HIT |
---|
| 202 | |
---|
| 203 | aggroup OPTERON_BR = FR_BR FR_BR_MIS FR_BR_TAKEN FR_BR_TAKEN_MIS |
---|
| 204 | aggroup OPTERON_FP = FP_ADD_PIPE FP_MULT_PIPE FP_ST_PIPE FP_FAST_FLAG |
---|
| 205 | aggroup OPTERON_FPU = FR_FPU_X87 FR_FPU_MMX_3D FR_FPU_SSE_SSE2_PACKED FR_FPU_SSE_SSE2_SCALAR |
---|
| 206 | |
---|
| 207 | aggroup OPTERON_ST1 = IC_FETCH_STALL FR_DECODER_EMPTY FR_DISPATCH_STALLS FR_DISPATCH_STALLS_FULL_FPU |
---|
| 208 | aggroup OPTERON_ST2 = FR_DISPATCH_STALLS_FULL_LS FR_DISPATCH_STALLS_FULL_REORDER FR_DISPATCH_STALLS_FULL_RESERVATION FR_DISPATCH_STALLS_BR |
---|
| 209 | aggroup OPTERON_ST3 = FR_DISPATCH_STALLS_FAR FR_DISPATCH_STALLS_SER FR_DISPATCH_STALLS_SEG FR_DISPATCH_STALLS_QUIET |
---|
| 210 | |
---|
| 211 | aggroup OPTERON_ETC = FR_X86_INS CPU_CLK_UNHALTED FR_HW_INTS FP_NONE_RET |
---|
| 212 | aggroup OPTERON_HTM = HT_LL_MEM_XFR HT_LR_MEM_XFR HT_RL_MEM_XFR |
---|
| 213 | aggroup OPTERON_HTI = HT_LL_IO_XFR HT_LR_IO_XFR HT_RL_IO_XFR |
---|
| 214 | |
---|
| 215 | ### POWER4-specific groupings (max 8 in group, restricted) |
---|
| 216 | aggroup POWER4_DC = PM_DATA_FROM_L2 PM_DATA_FROM_L25_SHR PM_DATA_FROM_L25_MOD PM_DATA_FROM_L275_SHR PM_DATA_FROM_L275_MOD PM_DATA_FROM_L3 PM_DATA_FROM_L35 PM_DATA_FROM_MEM # 5 |
---|
| 217 | aggroup POWER4_IC = PM_INST_FROM_PREF PM_INST_FROM_L1 PM_INST_FROM_L2 PM_INST_FROM_L25_L275 PM_INST_FROM_L3 PM_INST_FROM_L35 PM_INST_FROM_MEM # 6 |
---|
| 218 | aggroup POWER4_L1 = PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56 |
---|
| 219 | aggroup POWER4_TLB = PM_ITLB_MISS PM_DTLB_MISS |
---|
| 220 | aggroup POWER4_LX = PM_ITLB_MISS PM_DTLB_MISS PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56 |
---|
| 221 | aggroup POWER4_BR = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA # 3,55,61 |
---|
| 222 | aggroup POWER4_BRT = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA PM_BIQ_IDU_FULL_CYC PM_BRQ_FULL_CYC PM_L1_WRITE_CYC PM_INST_CMPL PM_CYC # 55 |
---|
| 223 | aggroup POWER4_LSF = PM_FPU_STF PM_LSU_LDF # 15,54,60 |
---|
| 224 | aggroup POWER4_STL = PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54 |
---|
| 225 | aggroup POWER4_LST = PM_INST_CMPL PM_FPU_STF PM_LSU_LDF PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54 |
---|
| 226 | aggroup POWER4_FP = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST # 53 |
---|
| 227 | aggroup POWER4_IFP = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST PM_FXU_FIN # 53 |
---|
| 228 | aggroup POWER4_II = PM_INST_DISP PM_INST_CMPL # 1,2,18,20 |
---|
| 229 | |
---|
| 230 | ### MIPS-R1200 groupings (max 32 in group, unrestricted) |
---|
| 231 | aggroup R12000_ALL = TLB_misses primary_data_cache_misses secondary_data_cache_misses primary_instruction_cache_misses secondary_instruction_cache_misses graduated_instructions mispredicted_branches graduated_loads graduated_stores graduated_floating-point_instructions decoded_instructions cycles prefetch_primary_data_cache_misses |
---|
| 232 | aggroup R12000_ALL_PAPI = PAPI_L1_DCM PAPI_L1_ICM PAPI_L2_DCM PAPI_L2_ICM PAPI_TLB_TL PAPI_TOT_INS PAPI_FP_INS PAPI_LD_INS PAPI_SR_INS PAPI_TOT_IIS PAPI_BR_CN PAPI_BR_MSP PAPI_CSR_TOT PAPI_CSR_FAL PAPI_TOT_CYC PAPI_PRF_DM PAPI_CA_INV PAPI_CA_ITV |
---|
| 233 | |
---|
| 234 | ### UltraSPARC-III/IV groupings (max 2 in group, restricted) |
---|
| 235 | aggroup US3_CPI = Cycle_cnt Instr_cnt # duplicates |
---|
| 236 | |
---|
| 237 | # cycles/stalls groups |
---|
| 238 | aggroup US3_SMP = Dispatch_rs_mispred Dispatch0_mispred # stall misprediction |
---|
| 239 | aggroup US3_SUS = Rstall_IU_use Rstall_FP_use # stall IU/FP use |
---|
| 240 | aggroup US3_SST = Rstall_storeQ Re_RAW_miss # stall store |
---|
| 241 | aggroup US3_SCD = Cycle_cnt Re_DC_miss |
---|
| 242 | aggroup US3_SCO = Dispatch0_br_target Re_DC_missovhd |
---|
| 243 | aggroup US3_SCE = Dispatch0_2nd_br Re_EC_miss |
---|
| 244 | aggroup US3_SCP = Dispatch0_IC_miss Re_PC_miss |
---|
| 245 | aggroup US3_SCX = SI_ciq_flow Re_FPU_bypass # Re_FPU_bypass always zero? |
---|
| 246 | |
---|
| 247 | # instruction and TLB groups |
---|
| 248 | aggroup US3_FPU = FA_pipe_completion FM_pipe_completion |
---|
| 249 | aggroup US3_BMS = IU_Stat_Br_miss_taken IU_Stat_Br_miss_untaken |
---|
| 250 | aggroup US3_BCS = IU_Stat_Br_count_taken IU_Stat_Br_count_untaken |
---|
| 251 | aggroup US3_ITL = Instr_cnt ITLB_miss |
---|
| 252 | aggroup US3_DTL = Cycle_cnt DTLB_miss |
---|
| 253 | |
---|
| 254 | # memory and cache groups |
---|
| 255 | aggroup US3_ICH = IC_ref IC_miss |
---|
| 256 | aggroup US3_DCR = DC_rd DC_rd_miss |
---|
| 257 | aggroup US3_DCW = DC_wr DC_wr_miss |
---|
| 258 | aggroup US3_ECI = EC_write_hit_RTO EC_ic_miss |
---|
| 259 | aggroup US3_ECM = EC_rd_miss EC_misses |
---|
| 260 | |
---|
| 261 | # locality/SSM and other miscellaneous groups |
---|
| 262 | aggroup US3_ECL = EC_miss_local EC_miss_remote # only SF15000/SF25000 |
---|
| 263 | aggroup US3_ECX = EC_wb_remote EC_miss_mtag_remote # only SF15000/SF25000 |
---|
| 264 | aggroup US3_ECW = EC_ref EC_wb |
---|
| 265 | aggroup US3_ECS = EC_snoop_inv EC_snoop_cb |
---|
| 266 | aggroup US3_PCR = PC_port0_rd PC_port1_rd |
---|
| 267 | aggroup US3_ETC = SI_snoop PC_MS_misses |
---|
| 268 | aggroup US3_WCM = SI_owned WC_miss |
---|
| 269 | |
---|
| 270 | # memory controller groups |
---|
| 271 | aggroup US3_SM1 = MC_stalls_0 MC_stalls_1 |
---|
| 272 | aggroup US3_SM2 = MC_stalls_2 MC_stalls_3 |
---|
| 273 | aggroup US3_MC0 = MC_reads_0 MC_writes_0 |
---|
| 274 | aggroup US3_MC1 = MC_reads_1 MC_writes_1 |
---|
| 275 | aggroup US3_MC2 = MC_reads_2 MC_writes_2 |
---|
| 276 | aggroup US3_MC3 = MC_reads_3 MC_writes_3 |
---|
| 277 | |
---|
| 278 | ### Itanium2 groupings (max 4 in group, partially restricted) |
---|
| 279 | aggroup ITANIUM2_TLB = ITLB_MISSES_FETCH_L1ITLB ITLB_MISSES_FETCH_L2ITLB L2DTLB_MISSES L1DTLB_TRANSFER |
---|
| 280 | aggroup ITANIUM2_BR = BRANCH_EVENT BR_MISPRED_DETAIL_ALL_CORRECT_PRED BR_MISPRED_DETAIL_ALL_WRONG_PATH BR_MISPRED_DETAIL_ALL_WRONG_TARGET |
---|
| 281 | aggroup ITANIUM2_STL = DISP_STALLED BACK_END_BUBBLE_ALL BE_EXE_BUBBLE_ALL BE_EXE_BUBBLE_FRALL |
---|
| 282 | |
---|
| 283 | aggroup ITANIUM2_L1D = DATA_REFERENCES_SET1 L1D_READS_SET1 L1D_READ_MISSES_ALL L2_DATA_REFERENCES_L2_ALL |
---|
| 284 | aggroup ITANIUM2_L2D = L2_DATA_REFERENCES_L2_DATA_READS L2_DATA_REFERENCES_L2_DATA_WRITES L3_READS_DATA_READ_ALL L3_WRITES_DATA_WRITE_ALL |
---|
| 285 | aggroup ITANIUM2_L3D = L3_READS_DATA_READ_HIT L3_READS_DATA_READ_MISS L3_WRITES_DATA_WRITE_HIT L3_WRITES_DATA_WRITE_MISS |
---|
| 286 | aggroup ITANIUM2_LXD = L2_MISSES L3_REFERENCES L3_READS_ALL_MISS L3_WRITES_ALL_MISS |
---|
| 287 | aggroup ITANIUM2_LXX = L3_MISSES L3_WRITES_L2_WB_HIT L3_WRITES_L2_WB_MISS |
---|
| 288 | |
---|
| 289 | aggroup ITANIUM2_ICD = L1I_READS L2_INST_DEMAND_READS L3_READS_DINST_FETCH_HIT L3_READS_DINST_FETCH_MISS # instruction cache (demand-load only) |
---|
| 290 | aggroup ITANIUM2_ICP = L1I_PREFETCHES L2_INST_PREFETCHES L3_READS_INST_FETCH_HIT L3_READS_INST_FETCH_MISS # instruction cache (incl. prefetch) |
---|
| 291 | |
---|
| 292 | aggroup ITANIUM2_IN1 = INST_DISPERSED IA32_INST_RETIRED IA64_INST_RETIRED LOADS_RETIRED |
---|
| 293 | aggroup ITANIUM2_IN2 = FP_OPS_RETIRED LOADS_RETIRED CPU_CYCLES ISA_TRANSITIONS |
---|
| 294 | aggroup ITANIUM2_ISA = IA32_INST_RETIRED IA64_INST_RETIRED IA32_ISA_TRANSITIONS STORES_RETIRED |
---|
| 295 | aggroup ITANIUM2_FLP = CPU_CYCLES FP_OPS_RETIRED INST_DISPERSED LOADS_RETIRED |
---|
| 296 | |
---|
| 297 | ### compositions are derived by combining measurements and create hierarchies |
---|
| 298 | ### **** generic hierarchy **** |
---|
| 299 | |
---|
| 300 | ### cycles (including stalls) |
---|
| 301 | compose CYCLES = BUSY + STALL + IDLE |
---|
| 302 | compose STALL = DISPATCH + UNIT_USE + RECIRCULATE |
---|
| 303 | |
---|
| 304 | ### instructions |
---|
| 305 | compose INSTRUCTION = BRANCH + INTEGER + FLOATING_POINT + MEMORY |
---|
| 306 | compose BRANCH = BRANCH_PRED + BRANCH_MISP |
---|
| 307 | compose FLOATING_POINT = FP_ADD + FP_MUL + FP_FMA + FP_DIV + FP_INV + FP_SQRT + FP_MISC |
---|
| 308 | compose MEMORY = LOAD + STORE + SYNCH |
---|
| 309 | |
---|
| 310 | ### data accesses (to cache hierarchy & memory) |
---|
| 311 | compose DATA_ACCESS = DATA_HIT_L1$ + DATA_HIT_L2$ + DATA_HIT_L3$ + DATA_HIT_MEM |
---|
| 312 | compose DATA_HIT_L1$ = DATA_STORE_INTO_L1$ + DATA_LOAD_FROM_L1$ |
---|
| 313 | compose DATA_HIT_L2$ = DATA_STORE_INTO_L2$ + DATA_LOAD_FROM_L2$ |
---|
| 314 | compose DATA_HIT_L3$ = DATA_STORE_INTO_L3$ + DATA_LOAD_FROM_L3$ |
---|
| 315 | compose DATA_HIT_MEM = DATA_STORE_INTO_MEM + DATA_LOAD_FROM_MEM |
---|
| 316 | |
---|
| 317 | ### instruction accesses (to cache hierarchy & memory) |
---|
| 318 | compose INST_ACCESS = INST_HIT_PREF + INST_HIT_L1$ + INST_HIT_L2$ + INST_HIT_L3$ + INST_HIT_MEM |
---|
| 319 | |
---|
| 320 | ### TLB accesses (instruction & data) |
---|
| 321 | compose TLB_ACCESS = DATA_TLB_ACCESS + INST_TLB_ACCESS |
---|
| 322 | compose DATA_TLB_ACCESS = DATA_TLB_HIT + DATA_TLB_MISS |
---|
| 323 | compose INST_TLB_ACCESS = INST_TLB_HIT + INST_TLB_MISS |
---|
| 324 | |
---|