source: proiecte/hpl/openmpi_compiled/share/vampirtrace/METRICS.SPEC @ 97

Last change on this file since 97 was 97, checked in by (none), 14 years ago

Adding compiled files

File size: 13.8 KB
Line 
1# VampirTrace metrics specification
2#
3# measurement definitions complement/extend native (and PAPI preset) definitions
4# measured metrics derive from a single measured counter (in principle)
5# all measure definitions are considered equivalent
6# (though some may be platform-specific)
7
8### generic (PAPI) measurement aliases
9
10### level 1 cache accesses
11measure L1_ACCESS   = PAPI_L1_TCA
12measure L1_I_ACCESS = PAPI_L1_ICA
13measure L1_D_ACCESS = PAPI_L1_DCA
14
15### level 1 cache reads
16measure L1_READ     = PAPI_L1_TCR
17measure L1_I_READ   = PAPI_L1_ICR # equivalent to PAPI_L1_ICA
18measure L1_D_READ   = PAPI_L1_DCR
19
20### level 1 cache writes
21measure L1_WRITE    = PAPI_L1_TCW
22measure L1_I_WRITE  = PAPI_L1_ICW # never defined
23measure L1_D_WRITE  = PAPI_L1_DCW
24
25### level 1 cache hits
26measure L1_HIT      = PAPI_L1_TCH
27measure L1_I_HIT    = PAPI_L1_ICH
28measure L1_D_HIT    = PAPI_L1_DCH
29
30### level 1 cache misses
31measure L1_MISS     = PAPI_L1_TCM
32measure L1_I_MISS   = PAPI_L1_ICM
33measure L1_D_MISS   = PAPI_L1_DCM
34measure L1_D_READ_MISS = PAPI_L1_LDM
35measure L1_D_WRITE_MISS = PAPI_L1_STM
36
37### alternate level 1 cache representation
38measure L1_INST       = PAPI_L1_ICA
39measure L1_INST_HIT   = PAPI_L1_ICH
40measure L1_INST_MISS  = PAPI_L1_ICM
41measure L1_LOAD       = PAPI_L1_DCR
42measure L1_LOAD_HIT   = PAPI_L1_LDH # non-standard
43measure L1_LOAD_MISS  = PAPI_L1_LDM
44measure L1_STORE      = PAPI_L1_DCW
45measure L1_STORE_HIT  = PAPI_L1_STH # non-standard
46measure L1_STORE_MISS = PAPI_L1_STM
47
48### level 2 cache accesses
49measure L2_ACCESS   = PAPI_L2_TCA
50measure L2_I_ACCESS = PAPI_L2_ICA
51measure L2_D_ACCESS = PAPI_L2_DCA
52
53### level 2 cache reads
54measure L2_READ     = PAPI_L2_TCR
55measure L2_I_READ   = PAPI_L2_ICR # equivalent to PAPI_L2_ICA
56measure L2_D_READ   = PAPI_L2_DCR
57
58### level 2 cache writes
59measure L2_WRITE    = PAPI_L2_TCW
60measure L2_I_WRITE  = PAPI_L2_ICW # never defined
61measure L2_D_WRITE  = PAPI_L2_DCW
62
63### level 2 cache hits
64measure L2_HIT      = PAPI_L2_TCH
65measure L2_I_HIT    = PAPI_L2_ICH
66measure L2_D_HIT    = PAPI_L2_DCH
67
68### level 2 cache misses
69measure L2_MISS     = PAPI_L2_TCM
70measure L2_I_MISS   = PAPI_L2_ICM
71measure L2_D_MISS   = PAPI_L2_DCM
72measure L2_D_READ_MISS = PAPI_L2_LDM
73measure L2_D_WRITE_MISS = PAPI_L2_STM
74
75### alternate level 2 cache representation
76measure L2_INST       = PAPI_L2_ICA
77measure L2_INST_HIT   = PAPI_L2_ICH
78measure L2_INST_MISS  = PAPI_L2_ICM
79measure L2_LOAD       = PAPI_L2_DCR
80measure L2_LOAD_HIT   = PAPI_L2_LDH # non-standard
81measure L2_LOAD_MISS  = PAPI_L2_LDM
82measure L2_STORE      = PAPI_L2_DCW
83measure L2_STORE_HIT  = PAPI_L2_STH # non-standard
84measure L2_STORE_MISS = PAPI_L2_STM
85
86### level 3 cache accesses
87measure L3_ACCESS   = PAPI_L3_TCA
88measure L3_I_ACCESS = PAPI_L3_ICA
89measure L3_D_ACCESS = PAPI_L3_DCA
90
91### level 3 cache reads
92measure L3_READ     = PAPI_L3_TCR
93measure L3_I_READ   = PAPI_L3_ICR # equivalent to PAPI_L3_ICA
94measure L3_D_READ   = PAPI_L3_DCR
95
96### level 3 cache writes
97measure L3_WRITE    = PAPI_L3_TCW
98measure L3_I_WRITE  = PAPI_L3_ICW # never defined
99measure L3_D_WRITE  = PAPI_L3_DCW
100
101### level 3 cache hits
102measure L3_HIT      = PAPI_L3_TCH
103measure L3_I_HIT    = PAPI_L3_ICH
104measure L3_D_HIT    = PAPI_L3_DCH
105
106### level 3 cache misses
107measure L3_MISS     = PAPI_L3_TCM
108measure L3_I_MISS   = PAPI_L3_ICM
109measure L3_D_MISS   = PAPI_L3_DCM
110measure L3_D_READ_MISS = PAPI_L3_LDM
111measure L3_D_WRITE_MISS = PAPI_L3_STM
112
113### alternate level 3 cache representation
114measure L3_INST       = PAPI_L3_ICA
115measure L3_INST_HIT   = PAPI_L3_ICH
116measure L3_INST_MISS  = PAPI_L3_ICM
117measure L3_LOAD       = PAPI_L3_DCR
118measure L3_LOAD_HIT   = PAPI_L3_LDH # non-standard
119measure L3_LOAD_MISS  = PAPI_L3_LDM
120measure L3_STORE      = PAPI_L3_DCW
121measure L3_STORE_HIT  = PAPI_L3_STH # non-standard
122measure L3_STORE_MISS = PAPI_L3_STM
123
124### TLB misses
125measure TLB_MISS   = PAPI_TLB_TL
126measure TLB_I_MISS = PAPI_TLB_IM
127measure TLB_D_MISS = PAPI_TLB_DM
128
129### instructions
130measure INSTRUCTION    = PAPI_TOT_INS
131measure INTEGER        = PAPI_INT_INS
132measure FLOATING_POINT = PAPI_FP_INS
133measure FP_ADD         = PAPI_FAD_INS
134measure FP_MUL         = PAPI_FML_INS
135measure FP_FMA         = PAPI_FMA_INS
136measure FP_DIV         = PAPI_FDV_INS
137measure FP_INV         = PAPI_FNV_INS
138measure FP_SQRT        = PAPI_FSQ_INS
139measure VECTOR         = PAPI_VEC_INS
140measure SYNCH          = PAPI_SYC_INS
141measure LOAD_STORE     = PAPI_LST_INS
142measure LOAD           = PAPI_LD_INS
143measure STORE          = PAPI_SR_INS
144measure COND_STORE           = PAPI_CSR_TOT
145measure COND_STORE_SUCCESS   = PAPI_CSR_SUC
146measure COND_STORE_UNSUCCESS = PAPI_CSR_FAL
147measure BRANCH               = PAPI_BR_INS
148measure UNCOND_BRANCH        = PAPI_BR_UCN
149measure COND_BRANCH          = PAPI_BR_CN
150measure COND_BRANCH_TAKEN    = PAPI_BR_TKN
151measure COND_BRANCH_NOTTAKEN = PAPI_BR_NTK
152measure COND_BRANCH_PRED     = PAPI_BR_PRC
153measure COND_BRANCH_MISPRED  = PAPI_BR_MSP
154
155### cycles
156measure CYCLES = PAPI_TOT_CYC
157
158### idle units
159measure INTEGER_UNIT_IDLE   = PAPI_FXU_IDL
160measure FLOAT_UNIT_IDLE     = PAPI_FPU_IDL
161measure BRANCH_UNIT_IDLE    = PAPI_BRU_IDL
162measure LOADSTORE_UNIT_IDLE = PAPI_LSU_IDL
163
164### stalls
165measure STALL_MEMORY_ACCESS = PAPI_MEM_SCY
166measure STALL_MEMORY_READ   = PAPI_MEM_RCY
167measure STALL_MEMORY_WRITE  = PAPI_MEM_WCY
168measure STALL_INST_ISSUE    = PAPI_STL_ICY
169
170# platform-specific measurement aliases
171# (complement or redefine generic measurement aliases)
172# may need to key to particular platform if ambiguity
173
174### POWER4-specific metrics
175measure FP_LOAD  = PM_LSU_LDF
176measure FP_STORE = PM_FPU_STF
177measure FP_MISC  = PM_FPU_FMOV_FEST
178
179### UltraSPARC-III/IV-specific metrics
180measure STALL_L1_MISS    = Re_DC_miss # /1
181measure STALL_L2_MISS    = Re_EC_miss # /1
182measure STALL_IC_MISS    = Dispatch0_IC_miss # /0
183measure STALL_STOREQ     = Rstall_storeQ # /0
184measure STALL_IU_USE     = Rstall_IU_use # /0
185measure STALL_FP_USE     = Rstall_FP_use # /1
186measure STALL_PC_MISS    = Re_PC_miss # /1
187measure STALL_RAW_MISS   = Re_RAW_miss # /1
188measure STALL_FPU_BYPASS = Re_FPU_bypass # /1
189measure STALL_MISPRED    = Dispatch0_mispred # /1
190measure STALL_BR_TARGET  = Dispatch0_br_target # /0
191measure STALL_2ND_BR     = Dispatch0_2nd_br # /0
192measure STALL_L1_MSOVHD  = Re_DC_missovhd # /1
193
194### groupings of metrics for collective measurement
195
196### Opteron groupings (max 4 in group, unrestricted)
197aggroup OPTERON_DC1 = DC_ACCESS DC_MISS DC_L2_REFILL_I DC_SYS_REFILL_I
198aggroup OPTERON_DC2 = DC_L2_REFILL_M DC_L2_REFILL_O DC_L2_REFILL_E DC_L2_REFILL_S
199aggroup OPTERON_DC3 = DC_SYS_REFILL_M DC_SYS_REFILL_O DC_SYS_REFILL_E DC_SYS_REFILL_S
200aggroup OPTERON_IC  = IC_FETCH IC_MISS IC_L2_REFILL IC_SYS_REFILL
201aggroup OPTERON_TLB = DC_L1_DTLB_MISS_AND_L2_DTLB_MISS DC_L1_DTLB_MISS_AND_L2_DTLB_HIT IC_L1ITLB_MISS_AND_L2ITLB_MISS IC_L1ITLB_MISS_AND_L2ITLB_HIT
202
203aggroup OPTERON_BR  = FR_BR FR_BR_MIS FR_BR_TAKEN FR_BR_TAKEN_MIS
204aggroup OPTERON_FP  = FP_ADD_PIPE FP_MULT_PIPE FP_ST_PIPE FP_FAST_FLAG
205aggroup OPTERON_FPU = FR_FPU_X87 FR_FPU_MMX_3D FR_FPU_SSE_SSE2_PACKED FR_FPU_SSE_SSE2_SCALAR
206
207aggroup OPTERON_ST1 = IC_FETCH_STALL FR_DECODER_EMPTY FR_DISPATCH_STALLS FR_DISPATCH_STALLS_FULL_FPU
208aggroup OPTERON_ST2 = FR_DISPATCH_STALLS_FULL_LS FR_DISPATCH_STALLS_FULL_REORDER FR_DISPATCH_STALLS_FULL_RESERVATION FR_DISPATCH_STALLS_BR
209aggroup OPTERON_ST3 = FR_DISPATCH_STALLS_FAR FR_DISPATCH_STALLS_SER FR_DISPATCH_STALLS_SEG FR_DISPATCH_STALLS_QUIET
210
211aggroup OPTERON_ETC = FR_X86_INS CPU_CLK_UNHALTED FR_HW_INTS FP_NONE_RET
212aggroup OPTERON_HTM = HT_LL_MEM_XFR HT_LR_MEM_XFR HT_RL_MEM_XFR
213aggroup OPTERON_HTI = HT_LL_IO_XFR HT_LR_IO_XFR HT_RL_IO_XFR
214
215### POWER4-specific groupings (max 8 in group, restricted)
216aggroup POWER4_DC  = PM_DATA_FROM_L2 PM_DATA_FROM_L25_SHR PM_DATA_FROM_L25_MOD PM_DATA_FROM_L275_SHR PM_DATA_FROM_L275_MOD PM_DATA_FROM_L3 PM_DATA_FROM_L35 PM_DATA_FROM_MEM # 5
217aggroup POWER4_IC  = PM_INST_FROM_PREF PM_INST_FROM_L1 PM_INST_FROM_L2 PM_INST_FROM_L25_L275 PM_INST_FROM_L3 PM_INST_FROM_L35 PM_INST_FROM_MEM # 6
218aggroup POWER4_L1  = PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56
219aggroup POWER4_TLB = PM_ITLB_MISS PM_DTLB_MISS
220aggroup POWER4_LX  = PM_ITLB_MISS PM_DTLB_MISS PM_LD_REF_L1 PM_LD_MISS_L1 PM_ST_REF_L1 PM_ST_MISS_L1 # 56
221aggroup POWER4_BR  = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA # 3,55,61
222aggroup POWER4_BRT = PM_BR_ISSUED PM_BR_MPRED_CR PM_BR_MPRED_TA PM_BIQ_IDU_FULL_CYC PM_BRQ_FULL_CYC PM_L1_WRITE_CYC PM_INST_CMPL PM_CYC # 55
223aggroup POWER4_LSF = PM_FPU_STF PM_LSU_LDF # 15,54,60
224aggroup POWER4_STL = PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54
225aggroup POWER4_LST = PM_INST_CMPL PM_FPU_STF PM_LSU_LDF PM_CYC PM_FPU_FULL_CYC PM_FPU_STALL3 # 54
226aggroup POWER4_FP  = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST # 53
227aggroup POWER4_IFP = PM_FPU_FIN PM_FPU_FMA PM_FPU_FDIV PM_FPU_FSQRT PM_FPU_FMOV_FEST PM_FXU_FIN # 53
228aggroup POWER4_II  = PM_INST_DISP PM_INST_CMPL # 1,2,18,20
229
230### MIPS-R1200 groupings (max 32 in group, unrestricted)
231aggroup R12000_ALL = TLB_misses primary_data_cache_misses secondary_data_cache_misses primary_instruction_cache_misses secondary_instruction_cache_misses graduated_instructions mispredicted_branches graduated_loads graduated_stores graduated_floating-point_instructions decoded_instructions cycles prefetch_primary_data_cache_misses
232aggroup R12000_ALL_PAPI = PAPI_L1_DCM PAPI_L1_ICM PAPI_L2_DCM PAPI_L2_ICM PAPI_TLB_TL PAPI_TOT_INS PAPI_FP_INS PAPI_LD_INS PAPI_SR_INS PAPI_TOT_IIS PAPI_BR_CN PAPI_BR_MSP PAPI_CSR_TOT PAPI_CSR_FAL PAPI_TOT_CYC PAPI_PRF_DM PAPI_CA_INV PAPI_CA_ITV
233
234### UltraSPARC-III/IV groupings (max 2 in group, restricted)
235aggroup US3_CPI = Cycle_cnt Instr_cnt                   # duplicates
236
237# cycles/stalls groups
238aggroup US3_SMP = Dispatch_rs_mispred Dispatch0_mispred # stall misprediction
239aggroup US3_SUS = Rstall_IU_use Rstall_FP_use           # stall IU/FP use
240aggroup US3_SST = Rstall_storeQ Re_RAW_miss             # stall store
241aggroup US3_SCD = Cycle_cnt Re_DC_miss
242aggroup US3_SCO = Dispatch0_br_target Re_DC_missovhd
243aggroup US3_SCE = Dispatch0_2nd_br Re_EC_miss
244aggroup US3_SCP = Dispatch0_IC_miss Re_PC_miss
245aggroup US3_SCX = SI_ciq_flow Re_FPU_bypass       # Re_FPU_bypass always zero?
246
247# instruction and TLB groups
248aggroup US3_FPU = FA_pipe_completion FM_pipe_completion
249aggroup US3_BMS = IU_Stat_Br_miss_taken IU_Stat_Br_miss_untaken
250aggroup US3_BCS = IU_Stat_Br_count_taken IU_Stat_Br_count_untaken
251aggroup US3_ITL = Instr_cnt ITLB_miss
252aggroup US3_DTL = Cycle_cnt DTLB_miss
253
254# memory and cache groups
255aggroup US3_ICH = IC_ref IC_miss
256aggroup US3_DCR = DC_rd DC_rd_miss
257aggroup US3_DCW = DC_wr DC_wr_miss
258aggroup US3_ECI = EC_write_hit_RTO EC_ic_miss
259aggroup US3_ECM = EC_rd_miss EC_misses
260
261# locality/SSM and other miscellaneous groups
262aggroup US3_ECL = EC_miss_local EC_miss_remote          # only SF15000/SF25000
263aggroup US3_ECX = EC_wb_remote EC_miss_mtag_remote      # only SF15000/SF25000
264aggroup US3_ECW = EC_ref EC_wb
265aggroup US3_ECS = EC_snoop_inv EC_snoop_cb
266aggroup US3_PCR = PC_port0_rd PC_port1_rd
267aggroup US3_ETC = SI_snoop PC_MS_misses
268aggroup US3_WCM = SI_owned WC_miss
269
270# memory controller groups
271aggroup US3_SM1 = MC_stalls_0 MC_stalls_1
272aggroup US3_SM2 = MC_stalls_2 MC_stalls_3
273aggroup US3_MC0 = MC_reads_0 MC_writes_0
274aggroup US3_MC1 = MC_reads_1 MC_writes_1
275aggroup US3_MC2 = MC_reads_2 MC_writes_2
276aggroup US3_MC3 = MC_reads_3 MC_writes_3
277
278### Itanium2 groupings (max 4 in group, partially restricted)
279aggroup ITANIUM2_TLB = ITLB_MISSES_FETCH_L1ITLB ITLB_MISSES_FETCH_L2ITLB L2DTLB_MISSES L1DTLB_TRANSFER
280aggroup ITANIUM2_BR  = BRANCH_EVENT BR_MISPRED_DETAIL_ALL_CORRECT_PRED BR_MISPRED_DETAIL_ALL_WRONG_PATH BR_MISPRED_DETAIL_ALL_WRONG_TARGET
281aggroup ITANIUM2_STL = DISP_STALLED BACK_END_BUBBLE_ALL BE_EXE_BUBBLE_ALL BE_EXE_BUBBLE_FRALL
282
283aggroup ITANIUM2_L1D = DATA_REFERENCES_SET1 L1D_READS_SET1 L1D_READ_MISSES_ALL L2_DATA_REFERENCES_L2_ALL
284aggroup ITANIUM2_L2D = L2_DATA_REFERENCES_L2_DATA_READS L2_DATA_REFERENCES_L2_DATA_WRITES L3_READS_DATA_READ_ALL L3_WRITES_DATA_WRITE_ALL
285aggroup ITANIUM2_L3D = L3_READS_DATA_READ_HIT L3_READS_DATA_READ_MISS L3_WRITES_DATA_WRITE_HIT L3_WRITES_DATA_WRITE_MISS
286aggroup ITANIUM2_LXD = L2_MISSES L3_REFERENCES L3_READS_ALL_MISS L3_WRITES_ALL_MISS
287aggroup ITANIUM2_LXX = L3_MISSES L3_WRITES_L2_WB_HIT L3_WRITES_L2_WB_MISS
288
289aggroup ITANIUM2_ICD = L1I_READS L2_INST_DEMAND_READS L3_READS_DINST_FETCH_HIT L3_READS_DINST_FETCH_MISS # instruction cache (demand-load only)
290aggroup ITANIUM2_ICP = L1I_PREFETCHES L2_INST_PREFETCHES L3_READS_INST_FETCH_HIT L3_READS_INST_FETCH_MISS # instruction cache (incl. prefetch)
291
292aggroup ITANIUM2_IN1 = INST_DISPERSED IA32_INST_RETIRED IA64_INST_RETIRED LOADS_RETIRED
293aggroup ITANIUM2_IN2 = FP_OPS_RETIRED LOADS_RETIRED CPU_CYCLES ISA_TRANSITIONS
294aggroup ITANIUM2_ISA = IA32_INST_RETIRED IA64_INST_RETIRED IA32_ISA_TRANSITIONS STORES_RETIRED
295aggroup ITANIUM2_FLP = CPU_CYCLES FP_OPS_RETIRED INST_DISPERSED LOADS_RETIRED
296
297### compositions are derived by combining measurements and create hierarchies
298### **** generic hierarchy ****
299
300### cycles (including stalls)
301compose CYCLES = BUSY + STALL + IDLE
302compose STALL = DISPATCH + UNIT_USE + RECIRCULATE
303
304### instructions
305compose INSTRUCTION = BRANCH + INTEGER + FLOATING_POINT + MEMORY
306compose BRANCH = BRANCH_PRED + BRANCH_MISP
307compose FLOATING_POINT = FP_ADD + FP_MUL + FP_FMA + FP_DIV + FP_INV + FP_SQRT + FP_MISC
308compose MEMORY = LOAD + STORE + SYNCH
309
310### data accesses (to cache hierarchy & memory)
311compose DATA_ACCESS = DATA_HIT_L1$ + DATA_HIT_L2$ + DATA_HIT_L3$ + DATA_HIT_MEM
312compose DATA_HIT_L1$ = DATA_STORE_INTO_L1$ + DATA_LOAD_FROM_L1$
313compose DATA_HIT_L2$ = DATA_STORE_INTO_L2$ + DATA_LOAD_FROM_L2$
314compose DATA_HIT_L3$ = DATA_STORE_INTO_L3$ + DATA_LOAD_FROM_L3$
315compose DATA_HIT_MEM = DATA_STORE_INTO_MEM + DATA_LOAD_FROM_MEM
316
317### instruction accesses (to cache hierarchy & memory)
318compose INST_ACCESS = INST_HIT_PREF + INST_HIT_L1$ + INST_HIT_L2$ + INST_HIT_L3$ + INST_HIT_MEM
319
320### TLB accesses (instruction & data)
321compose TLB_ACCESS = DATA_TLB_ACCESS + INST_TLB_ACCESS
322compose DATA_TLB_ACCESS = DATA_TLB_HIT + DATA_TLB_MISS
323compose INST_TLB_ACCESS = INST_TLB_HIT + INST_TLB_MISS
324
Note: See TracBrowser for help on using the repository browser.