1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> |
---|
2 | |
---|
3 | <!--Converted with LaTeX2HTML 2002-2-1 (1.71) |
---|
4 | original version by: Nikos Drakos, CBLU, University of Leeds |
---|
5 | * revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan |
---|
6 | * with significant contributions from: |
---|
7 | Jens Lippmann, Marek Rouchal, Martin Wilck and others --> |
---|
8 | <HTML> |
---|
9 | <HEAD> |
---|
10 | <TITLE>UserManual</TITLE> |
---|
11 | <META NAME="description" CONTENT="UserManual"> |
---|
12 | <META NAME="keywords" CONTENT="UserManual"> |
---|
13 | <META NAME="resource-type" CONTENT="document"> |
---|
14 | <META NAME="distribution" CONTENT="global"> |
---|
15 | |
---|
16 | <META NAME="Generator" CONTENT="LaTeX2HTML v2002-2-1"> |
---|
17 | <META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css"> |
---|
18 | |
---|
19 | <STYLE> |
---|
20 | /* Century Schoolbook font is very similar to Computer Modern Math: cmmi */ |
---|
21 | .MATH { font-family: "Century Schoolbook", serif; } |
---|
22 | .MATH I { font-family: "Century Schoolbook", serif; font-style: italic } |
---|
23 | .BOLDMATH { font-family: "Century Schoolbook", serif; font-weight: bold } |
---|
24 | |
---|
25 | /* implement both fixed-size and relative sizes */ |
---|
26 | SMALL.XTINY { font-size : xx-small } |
---|
27 | SMALL.TINY { font-size : x-small } |
---|
28 | SMALL.SCRIPTSIZE { font-size : smaller } |
---|
29 | SMALL.FOOTNOTESIZE { font-size : small } |
---|
30 | SMALL.SMALL { } |
---|
31 | BIG.LARGE { } |
---|
32 | BIG.XLARGE { font-size : large } |
---|
33 | BIG.XXLARGE { font-size : x-large } |
---|
34 | BIG.HUGE { font-size : larger } |
---|
35 | BIG.XHUGE { font-size : xx-large } |
---|
36 | |
---|
37 | /* heading styles */ |
---|
38 | H1 { } |
---|
39 | H2 { } |
---|
40 | H3 { } |
---|
41 | H4 { } |
---|
42 | H5 { } |
---|
43 | |
---|
44 | /* mathematics styles */ |
---|
45 | DIV.displaymath { } /* math displays */ |
---|
46 | TD.eqno { } /* equation-number cells */ |
---|
47 | |
---|
48 | |
---|
49 | /* document-specific styles come next */ |
---|
50 | DIV.navigation { } |
---|
51 | PRE.preform { } |
---|
52 | SPAN.textit { font-style: italic } |
---|
53 | SPAN.arabic { } |
---|
54 | SPAN.textbf { font-weight: bold } |
---|
55 | |
---|
56 | BODY { font-family: sans-serif } |
---|
57 | </STYLE> |
---|
58 | |
---|
59 | </HEAD> |
---|
60 | |
---|
61 | <BODY > |
---|
62 | |
---|
63 | <P> |
---|
64 | |
---|
65 | <P> |
---|
66 | <B><BIG CLASS="XHUGE">VampirTrace 5.4.10 User Manual</BIG></B> |
---|
67 | <BR> |
---|
68 | <BR> |
---|
69 | <BR> |
---|
70 | TU Dresden |
---|
71 | <BR> |
---|
72 | Center for Information Services and |
---|
73 | <BR> |
---|
74 | High Performance Computing (ZIH) |
---|
75 | <BR> |
---|
76 | 01062 Dresden |
---|
77 | <BR> |
---|
78 | Germany |
---|
79 | <BR> |
---|
80 | <BR><TT><A NAME="tex2html1" |
---|
81 | HREF="http://www.tu-dresden.de/zih/">http://www.tu-dresden.de/zih/</A></TT> |
---|
82 | <BR><TT><A NAME="tex2html2" |
---|
83 | HREF="http://www.tu-dresden.de/zih/vampirtrace/">http://www.tu-dresden.de/zih/vampirtrace/</A></TT> |
---|
84 | <BR> |
---|
85 | <BR> |
---|
86 | E-Mail: <TT><A NAME="tex2html3" |
---|
87 | HREF="vampirsupport@zih.tu-dresden.de">vampirsupport@zih.tu-dresden.de</A></TT> |
---|
88 | <P> |
---|
89 | <BR> |
---|
90 | |
---|
91 | <H2><A NAME="SECTION00100000000000000000"> |
---|
92 | Contents</A> |
---|
93 | </H2> |
---|
94 | <!--Table of Contents--> |
---|
95 | |
---|
96 | <UL CLASS="TofC"> |
---|
97 | <LI><A NAME="tex2html54" |
---|
98 | HREF="UserManual.html#SECTION00200000000000000000">Introduction</A> |
---|
99 | <LI><A NAME="tex2html55" |
---|
100 | HREF="UserManual.html#SECTION00300000000000000000">Instrumentation</A> |
---|
101 | <UL> |
---|
102 | <LI><A NAME="tex2html56" |
---|
103 | HREF="UserManual.html#SECTION00310000000000000000">The Compiler Wrappers</A> |
---|
104 | <LI><A NAME="tex2html57" |
---|
105 | HREF="UserManual.html#SECTION00320000000000000000">Instrumentation Types</A> |
---|
106 | <LI><A NAME="tex2html58" |
---|
107 | HREF="UserManual.html#SECTION00330000000000000000">Automatic Instrumentation</A> |
---|
108 | <UL> |
---|
109 | <LI><A NAME="tex2html59" |
---|
110 | HREF="UserManual.html#SECTION00331000000000000000">Notes for Using the GNU, Intel, or Pathscale Compiler</A> |
---|
111 | <LI><A NAME="tex2html60" |
---|
112 | HREF="UserManual.html#SECTION00332000000000000000">Known License Issues with BFD</A> |
---|
113 | <LI><A NAME="tex2html61" |
---|
114 | HREF="UserManual.html#SECTION00333000000000000000">Notes on Instrumentation of Inline Functions</A> |
---|
115 | </UL> |
---|
116 | <LI><A NAME="tex2html62" |
---|
117 | HREF="UserManual.html#SECTION00340000000000000000">Manual Instrumentation using the VampirTrace API</A> |
---|
118 | <LI><A NAME="tex2html63" |
---|
119 | HREF="UserManual.html#SECTION00350000000000000000">Manual Instrumentation using POMP</A> |
---|
120 | <LI><A NAME="tex2html64" |
---|
121 | HREF="UserManual.html#SECTION00360000000000000000">Binary instrumentation using Dyninst</A> |
---|
122 | </UL> |
---|
123 | <BR> |
---|
124 | <LI><A NAME="tex2html65" |
---|
125 | HREF="UserManual.html#SECTION00400000000000000000">Runtime Measurement</A> |
---|
126 | <UL> |
---|
127 | <LI><A NAME="tex2html66" |
---|
128 | HREF="UserManual.html#SECTION00410000000000000000">Environment Variables</A> |
---|
129 | <LI><A NAME="tex2html67" |
---|
130 | HREF="UserManual.html#SECTION00420000000000000000">Influencing Trace File Size</A> |
---|
131 | <LI><A NAME="tex2html68" |
---|
132 | HREF="UserManual.html#SECTION00430000000000000000">Unification of local Traces</A> |
---|
133 | </UL> |
---|
134 | <BR> |
---|
135 | <LI><A NAME="tex2html69" |
---|
136 | HREF="UserManual.html#SECTION00500000000000000000">Recording additional Events and Counters</A> |
---|
137 | <UL> |
---|
138 | <LI><A NAME="tex2html70" |
---|
139 | HREF="UserManual.html#SECTION00510000000000000000">PAPI Hardware Performance Counters</A> |
---|
140 | <LI><A NAME="tex2html71" |
---|
141 | HREF="UserManual.html#SECTION00520000000000000000">Memory Allocation Counters</A> |
---|
142 | <LI><A NAME="tex2html72" |
---|
143 | HREF="UserManual.html#SECTION00530000000000000000">Application I/O Calls</A> |
---|
144 | <LI><A NAME="tex2html73" |
---|
145 | HREF="UserManual.html#SECTION00540000000000000000">User Defined Counters</A> |
---|
146 | </UL> |
---|
147 | <BR> |
---|
148 | <LI><A NAME="tex2html74" |
---|
149 | HREF="UserManual.html#SECTION00600000000000000000">Filtering & Grouping</A> |
---|
150 | <UL> |
---|
151 | <LI><A NAME="tex2html75" |
---|
152 | HREF="UserManual.html#SECTION00610000000000000000">Function Filtering</A> |
---|
153 | <LI><A NAME="tex2html76" |
---|
154 | HREF="UserManual.html#SECTION00620000000000000000">Function Grouping</A> |
---|
155 | </UL> |
---|
156 | <BR> |
---|
157 | <LI><A NAME="tex2html77" |
---|
158 | HREF="UserManual.html#SECTION00700000000000000000">Command Reference</A> |
---|
159 | <UL> |
---|
160 | <LI><A NAME="tex2html78" |
---|
161 | HREF="UserManual.html#SECTION00710000000000000000">Compiler Wrappers (vtcc,vtcxx,vtf77,vtf90)</A> |
---|
162 | <LI><A NAME="tex2html79" |
---|
163 | HREF="UserManual.html#SECTION00720000000000000000">Local Trace Unifier (vtunify)</A> |
---|
164 | <LI><A NAME="tex2html80" |
---|
165 | HREF="UserManual.html#SECTION00730000000000000000">Dyninst Mutator (vtdyn)</A> |
---|
166 | <LI><A NAME="tex2html81" |
---|
167 | HREF="UserManual.html#SECTION00740000000000000000">Trace Filter Tool (vtfilter)</A> |
---|
168 | </UL> |
---|
169 | <BR> |
---|
170 | <LI><A NAME="tex2html82" |
---|
171 | HREF="UserManual.html#SECTION00800000000000000000">PAPI Counter Specifications</A> |
---|
172 | <LI><A NAME="tex2html83" |
---|
173 | HREF="UserManual.html#SECTION00900000000000000000">VampirTrace Installation</A> |
---|
174 | <UL> |
---|
175 | <LI><A NAME="tex2html84" |
---|
176 | HREF="UserManual.html#SECTION00910000000000000000">Basics</A> |
---|
177 | <LI><A NAME="tex2html85" |
---|
178 | HREF="UserManual.html#SECTION00920000000000000000">Configure Options</A> |
---|
179 | <LI><A NAME="tex2html86" |
---|
180 | HREF="UserManual.html#SECTION00930000000000000000">Cross Compilation</A> |
---|
181 | <LI><A NAME="tex2html87" |
---|
182 | HREF="UserManual.html#SECTION00940000000000000000">Environment Set-Up</A> |
---|
183 | <LI><A NAME="tex2html88" |
---|
184 | HREF="UserManual.html#SECTION00950000000000000000">Notes for Developers</A> |
---|
185 | </UL></UL> |
---|
186 | <!--End of Table of Contents--> |
---|
187 | <P> |
---|
188 | This documentation describes how to prepare application programs in order |
---|
189 | to have traces generated, when executed. This step is called <SPAN CLASS="textit">instrumentation</SPAN>. |
---|
190 | Furthermore, it explains how to control the run-time measurement system |
---|
191 | during execution (<SPAN CLASS="textit">tracing</SPAN>). |
---|
192 | This also includes hardware performance counter sampling, |
---|
193 | as well as selective filtering and grouping of functions. |
---|
194 | |
---|
195 | <P> |
---|
196 | |
---|
197 | <P> |
---|
198 | |
---|
199 | <H1><A NAME="SECTION00200000000000000000"> |
---|
200 | Introduction</A> |
---|
201 | </H1> |
---|
202 | |
---|
203 | <P> |
---|
204 | VampirTrace consists of a tool-set and a run-time library for instrumentation |
---|
205 | and tracing of software applications. It is particularly tailored towards |
---|
206 | parallel and distributed High Performance Computing (HPC) applications. |
---|
207 | |
---|
208 | <P> |
---|
209 | The instrumentation part modifies a given application in order to inject |
---|
210 | additional measurement calls during run-time. The tracing part provides |
---|
211 | the current measurement functionality used by the instrumentation calls. |
---|
212 | By this means, a variety of detailed performance properties can be collected |
---|
213 | and recorded during run-time. |
---|
214 | This includes |
---|
215 | |
---|
216 | <P> |
---|
217 | |
---|
218 | <UL> |
---|
219 | <LI>Function call enter and leave events |
---|
220 | </LI> |
---|
221 | <LI>MPI communication events |
---|
222 | </LI> |
---|
223 | <LI>OpenMP events |
---|
224 | </LI> |
---|
225 | <LI>Hardware performance counters |
---|
226 | </LI> |
---|
227 | <LI>various special purpose events |
---|
228 | </LI> |
---|
229 | </UL> |
---|
230 | |
---|
231 | <P> |
---|
232 | After a successful trace run, VampirTrace writes all collected data to a |
---|
233 | trace in the Open Trace Format (OTF), see <TT><A NAME="tex2html4" |
---|
234 | HREF="http://www.tu-dresden.de/zih/otf">http://www.tu-dresden.de/zih/otf</A></TT>. |
---|
235 | |
---|
236 | <P> |
---|
237 | As a result the information is available for post-mortem analysis and |
---|
238 | visualization by various tools. |
---|
239 | Most notably, VampirTrace provides the input data for the Vampir analysis |
---|
240 | and visualization tool, see <TT><A NAME="tex2html5" |
---|
241 | HREF="http://www.vampir.eu">http://www.vampir.eu</A></TT>. |
---|
242 | |
---|
243 | <P> |
---|
244 | VampirTrace is included in OpenMPI 1.3 and later. |
---|
245 | If not disabled explicitly, VampirTrace is built automatically when installing OpenMPI. |
---|
246 | Refer to <TT><A NAME="tex2html6" |
---|
247 | HREF="http://www.open-mpi.org/faq/?category=vampirtrace">http://www.open-mpi.org/faq/?category=vampirtrace</A></TT> for more information. |
---|
248 | |
---|
249 | <P> |
---|
250 | Trace files can quickly become very large. With automatic instrumentation, |
---|
251 | even tracing applications that run only for a few seconds can result in |
---|
252 | trace files of several hundred megabytes. To protect users from |
---|
253 | creating trace files of several gigabytes, the default behavior of |
---|
254 | VampirTrace limits the internal buffer to 32 MB. This produces trace |
---|
255 | files that are not larger than 32 MB per process, typically a lot smaller. |
---|
256 | Please read Section <A HREF="#trace_file_size">3.2</A> on how to remove or change the limit. |
---|
257 | |
---|
258 | <P> |
---|
259 | VampirTrace supports various Unix and Linux platforms common in |
---|
260 | HPC nowadays. It comes as open source software under a BSD License. |
---|
261 | |
---|
262 | <P> |
---|
263 | |
---|
264 | <H1><A NAME="SECTION00300000000000000000"> |
---|
265 | Instrumentation</A> |
---|
266 | </H1> |
---|
267 | |
---|
268 | <P> |
---|
269 | To make measurements with VampirTrace, the user's application program needs to |
---|
270 | be instrumented, i.e., at specific important points (called ``events'') VampirTrace |
---|
271 | measurement calls have to be activated. |
---|
272 | As an example, common events are entering and leaving |
---|
273 | of function calls, as well as sending and receiving of MPI messages. |
---|
274 | |
---|
275 | <P> |
---|
276 | By default, VampirTrace handles this automatically. In order to enable |
---|
277 | instrumentation of function calls, the user only needs to replace the compiler |
---|
278 | and linker commands with VampirTrace's wrappers, |
---|
279 | see Section <A HREF="#compiler_wrappers">2.1</A> below. |
---|
280 | VampirTrace supports different ways of instrumentation as described in |
---|
281 | Section <A HREF="#instrumentation_types">2.2</A>. |
---|
282 | |
---|
283 | <P> |
---|
284 | |
---|
285 | <H1><A NAME="SECTION00310000000000000000"> </A> |
---|
286 | <A NAME="compiler_wrappers"> </A> |
---|
287 | <BR> |
---|
288 | The Compiler Wrappers |
---|
289 | </H1> |
---|
290 | |
---|
291 | <P> |
---|
292 | All the necessary instrumentation of user functions as well as MPI and |
---|
293 | OpenMP events is handled by VampirTrace's compiler wrappers (vtcc, vtcxx, vtf77, and |
---|
294 | vtf90). |
---|
295 | In the script used to build the application (e.g. a makefile), all compile |
---|
296 | and link commands should be replaced by the VampirTrace compiler wrapper. |
---|
297 | The wrappers perform the necessary instrumentation of the program and link |
---|
298 | the suitable VampirTrace library. |
---|
299 | Note that the VampirTrace version included in OpenMPI 1.3 |
---|
300 | has additional wrappers (mpicc-vt, mpicxx-vt, mpif77-vt, and mpif90-vt) which |
---|
301 | are like the ordinary MPI compiler wrappers (mpicc and friends) with the extension |
---|
302 | of automatic instrumentation. |
---|
303 | |
---|
304 | <P> |
---|
305 | The following list shows some examples depending on the parallelization |
---|
306 | type of the program: |
---|
307 | |
---|
308 | <P> |
---|
309 | |
---|
310 | <UL> |
---|
311 | <LI><SPAN CLASS="textbf">Serial programs</SPAN>: |
---|
312 | Compiling serial code is the default behavior of the wrappers. |
---|
313 | Simply replace the compiler by VampirTrace's wrapper: |
---|
314 | |
---|
315 | <P> |
---|
316 | <BR> |
---|
317 | <TABLE CELLPADDING=3> |
---|
318 | <TR><TD ALIGN="LEFT">original:</TD> |
---|
319 | <TD ALIGN="LEFT"><TT>gfortran a.f90 b.f90 -o myprog </TT></TD> |
---|
320 | </TR> |
---|
321 | <TR><TD ALIGN="LEFT">with instrumentation:</TD> |
---|
322 | <TD ALIGN="LEFT"><TT>vtf90 a.f90 b.f90 -o myprog </TT></TD> |
---|
323 | </TR> |
---|
324 | </TABLE> |
---|
325 | <BR> |
---|
326 | |
---|
327 | <P> |
---|
328 | This will instrument user functions (if supported by compiler) and |
---|
329 | link the VampirTrace library. |
---|
330 | |
---|
331 | <P> |
---|
332 | </LI> |
---|
333 | <LI><SPAN CLASS="textbf">MPI parallel programs</SPAN>: |
---|
334 | MPI instrumentation is always handled by means of the PMPI interface |
---|
335 | which is part of the MPI standard. |
---|
336 | This requires the compiler wrapper to link with an MPI-aware version of |
---|
337 | the VampirTrace library. |
---|
338 | If your MPI implementation uses MPI compilers (e.g. mpicc, mpxlf90), |
---|
339 | you need to tell VampirTrace's wrapper to use this compiler |
---|
340 | instead of the serial one: |
---|
341 | |
---|
342 | <P> |
---|
343 | <BR> |
---|
344 | <TABLE CELLPADDING=3> |
---|
345 | <TR><TD ALIGN="LEFT">original:</TD> |
---|
346 | <TD ALIGN="LEFT"><TT>mpicc hello.c -o hello </TT></TD> |
---|
347 | </TR> |
---|
348 | <TR><TD ALIGN="LEFT">with instrumentation:</TD> |
---|
349 | <TD ALIGN="LEFT"><TT>vtcc -vt:cc mpicc hello.c -o hello </TT></TD> |
---|
350 | </TR> |
---|
351 | </TABLE> |
---|
352 | <BR> |
---|
353 | |
---|
354 | <P> |
---|
355 | MPI implementations without own compilers require the user to link |
---|
356 | the MPI library manually. In this case, you simply replace the compiler |
---|
357 | by VampirTrace's compiler wrapper: |
---|
358 | |
---|
359 | <P> |
---|
360 | <BR> |
---|
361 | <TABLE CELLPADDING=3> |
---|
362 | <TR><TD ALIGN="LEFT">original:</TD> |
---|
363 | <TD ALIGN="LEFT"><TT>icc hello.c -o hello -lmpi </TT></TD> |
---|
364 | </TR> |
---|
365 | <TR><TD ALIGN="LEFT">with instrumentation:</TD> |
---|
366 | <TD ALIGN="LEFT"><TT>vtcc hello.c -o hello -lmpi </TT></TD> |
---|
367 | </TR> |
---|
368 | </TABLE> |
---|
369 | <BR> |
---|
370 | |
---|
371 | <P> |
---|
372 | If you want to instrument MPI events only (creates smaller trace files and less overhead) |
---|
373 | use the option <TT>-vt:inst manual</TT> to disable automatic instrumentation |
---|
374 | of user functions (see also Section <A HREF="#A1">2.4</A>). |
---|
375 | |
---|
376 | <P> |
---|
377 | </LI> |
---|
378 | <LI><SPAN CLASS="textbf">OpenMP parallel programs</SPAN>: |
---|
379 | When VampirTrace detects OpenMP flags on the command line, |
---|
380 | OPARI is invoked for automatic source code instrumentation of OpenMP events: |
---|
381 | |
---|
382 | <P> |
---|
383 | <BR> |
---|
384 | <TABLE CELLPADDING=3> |
---|
385 | <TR><TD ALIGN="LEFT">original:</TD> |
---|
386 | <TD ALIGN="LEFT"><TT>ifort -openmp pi.f -o pi </TT></TD> |
---|
387 | </TR> |
---|
388 | <TR><TD ALIGN="LEFT">with instrumentation:</TD> |
---|
389 | <TD ALIGN="LEFT"><TT>vtf77 -openmp pi.f -o pi </TT></TD> |
---|
390 | </TR> |
---|
391 | </TABLE> |
---|
392 | <BR> |
---|
393 | |
---|
394 | <P> |
---|
395 | For more information about OPARI refer to <TT>share/vampirtrace/doc/opari/Readme.html</TT> in |
---|
396 | VampirTrace's installation directory. |
---|
397 | |
---|
398 | <P> |
---|
399 | </LI> |
---|
400 | <LI><SPAN CLASS="textbf">Hybrid MPI/OpenMP parallel programs</SPAN>: |
---|
401 | With a combination of the above mentioned approaches, hybrid applications can be instrumented: |
---|
402 | |
---|
403 | <P> |
---|
404 | <BR> |
---|
405 | <TABLE CELLPADDING=3> |
---|
406 | <TR><TD ALIGN="LEFT">original:</TD> |
---|
407 | <TD ALIGN="LEFT"><TT>mpif90 -openmp hybrid.F90 -o hybrid </TT></TD> |
---|
408 | </TR> |
---|
409 | <TR><TD ALIGN="LEFT">with instrumentation:</TD> |
---|
410 | <TD ALIGN="LEFT"><TT>vtf90 -vt:f90 mpif90 -openmp </TT></TD> |
---|
411 | </TR> |
---|
412 | <TR><TD ALIGN="LEFT"> </TD> |
---|
413 | <TD ALIGN="LEFT"><TT>hybrid.F90 -o hybrid </TT></TD> |
---|
414 | </TR> |
---|
415 | </TABLE> |
---|
416 | <BR> |
---|
417 | |
---|
418 | <P> |
---|
419 | </LI> |
---|
420 | </UL> |
---|
421 | |
---|
422 | <P> |
---|
423 | The VampirTrace compiler wrappers try to detect automatically which parallelization |
---|
424 | method is used by means of the compiler flags (e.g. <TT>-openmp</TT> or <TT>-lmpi</TT>) |
---|
425 | and the compiler command (e.g. <TT>mpif90</TT>). |
---|
426 | If the compiler wrapper failed to detect this correctly, |
---|
427 | the instrumentation could be incomplete and an unsuitable |
---|
428 | VampirTrace library would be linked to the binary. |
---|
429 | In this case, you should tell the compiler wrapper which parallelization method |
---|
430 | your program uses by the switches |
---|
431 | <TT>-vt:mpi</TT>, <TT>-vt:omp</TT>, and <TT>-vt:hyb</TT> for MPI, OpenMP, and |
---|
432 | hybrid programs, respectively. |
---|
433 | Note that these switches do not change the underlying compiler or compiler flags. |
---|
434 | Use the option <TT>-vt:verbose</TT> to see the command line the compiler wrapper executes. |
---|
435 | Refer to Appendix <A HREF="#comm_wrappers">A.1</A> for a list of all compiler wrapper options. |
---|
436 | |
---|
437 | <P> |
---|
438 | The default settings of the compiler wrappers can be modified in the files |
---|
439 | <TT>share/vampirtrace/vtcc-wrapper-data.txt</TT> (and similar for the other languages) |
---|
440 | in the installation directory of VampirTrace. |
---|
441 | The settings include compilers, compiler flags, libraries, and instrumentation types. |
---|
442 | For example, you could modify the default C compiler |
---|
443 | from <TT>gcc</TT> |
---|
444 | to <TT>mpicc</TT> by changing the line <TT>compiler=gcc</TT> to <TT>compiler=mpicc</TT>. |
---|
445 | This may be convenient if you instrument MPI parallel programs only. |
---|
446 | |
---|
447 | <P> |
---|
448 | |
---|
449 | <H1><A NAME="SECTION00320000000000000000"> </A> |
---|
450 | <A NAME="instrumentation_types"> </A> |
---|
451 | <BR> |
---|
452 | Instrumentation Types |
---|
453 | </H1> |
---|
454 | |
---|
455 | <P> |
---|
456 | The wrapper's option <TT>-vt:inst <insttype></TT> specifies the |
---|
457 | instrumentation type to use. Following values for <TT><insttype></TT> are possible: |
---|
458 | |
---|
459 | <P> |
---|
460 | |
---|
461 | <UL> |
---|
462 | <LI>fully-automatic instrumentation by the compiler (see Section <A HREF="#compiler_instrumentation">2.3</A>): |
---|
463 | |
---|
464 | <P> |
---|
465 | <TABLE CELLPADDING=3> |
---|
466 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">insttype</SPAN></TH> |
---|
467 | <TH ALIGN="LEFT"><SPAN CLASS="textbf">Compilers</SPAN></TH> |
---|
468 | </TR> |
---|
469 | <TR><TD ALIGN="LEFT">gnu</TD> |
---|
470 | <TD ALIGN="LEFT">GNU (e.g., gcc, g++, gfortran, g95)</TD> |
---|
471 | </TR> |
---|
472 | <TR><TD ALIGN="LEFT">intel</TD> |
---|
473 | <TD ALIGN="LEFT">Intel version ≥10.0 (e.g., icc, icpc, ifort)</TD> |
---|
474 | </TR> |
---|
475 | <TR><TD ALIGN="LEFT">pgi</TD> |
---|
476 | <TD ALIGN="LEFT">Portland Group (PGI) (e.g., pgcc, pgCC, pgf90, pgf77)</TD> |
---|
477 | </TR> |
---|
478 | <TR><TD ALIGN="LEFT">phat</TD> |
---|
479 | <TD ALIGN="LEFT">SUN Fortran 90 (e.g., cc, CC, f90)</TD> |
---|
480 | </TR> |
---|
481 | <TR><TD ALIGN="LEFT">xl</TD> |
---|
482 | <TD ALIGN="LEFT">IBM (e.g., xlcc, xlCC, xlf90)</TD> |
---|
483 | </TR> |
---|
484 | <TR><TD ALIGN="LEFT">ftrace</TD> |
---|
485 | <TD ALIGN="LEFT">NEC SX (e.g., sxcc, sxc++, sxf90)</TD> |
---|
486 | </TR> |
---|
487 | </TABLE> |
---|
488 | |
---|
489 | <P> |
---|
490 | </LI> |
---|
491 | <LI>manual instrumentation (needs source-code modifications): |
---|
492 | |
---|
493 | <P> |
---|
494 | <TABLE CELLPADDING=3> |
---|
495 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">insttype</SPAN></TH> |
---|
496 | <TH ALIGN="LEFT"><SPAN CLASS="textbf"> </SPAN></TH> |
---|
497 | </TR> |
---|
498 | <TR><TD ALIGN="LEFT">manual</TD> |
---|
499 | <TD ALIGN="LEFT">VampirTrace's API (see Section <A HREF="#A1">2.4</A>)</TD> |
---|
500 | </TR> |
---|
501 | <TR><TD ALIGN="LEFT">pomp</TD> |
---|
502 | <TD ALIGN="LEFT">POMP INST directives (see Section <A HREF="#A2">2.5</A>)</TD> |
---|
503 | </TR> |
---|
504 | </TABLE> |
---|
505 | |
---|
506 | <P> |
---|
507 | </LI> |
---|
508 | <LI>special instrumentation types (uses external tools): |
---|
509 | |
---|
510 | <P> |
---|
511 | <TABLE CELLPADDING=3> |
---|
512 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">insttype</SPAN></TH> |
---|
513 | <TH ALIGN="LEFT"><SPAN CLASS="textbf"> </SPAN></TH> |
---|
514 | </TR> |
---|
515 | <TR><TD ALIGN="LEFT">dyninst</TD> |
---|
516 | <TD ALIGN="LEFT">binary-instrumentation with Dyninst (Section <A HREF="#A3">2.6</A>)</TD> |
---|
517 | </TR> |
---|
518 | </TABLE> |
---|
519 | |
---|
520 | <P> |
---|
521 | </LI> |
---|
522 | </UL> |
---|
523 | |
---|
524 | <P> |
---|
525 | To determine which instrumentation type will be used by default and which other |
---|
526 | are available on your system take look at the entry <TT>inst_avail</TT> in the |
---|
527 | wrapper's configuration file (e.g. <TT>share/vampirtrace/vtcc-wrapper-data.txt</TT> in the |
---|
528 | installation directory of VampirTrace for the C compiler wrapper). |
---|
529 | |
---|
530 | <P> |
---|
531 | See Appendix <A HREF="#comm_wrappers">A.1</A> or type <TT>vtcc -vt:help</TT> for other |
---|
532 | options that can be passed through VampirTrace's compiler wrapper. |
---|
533 | |
---|
534 | <P> |
---|
535 | |
---|
536 | <H1><A NAME="SECTION00330000000000000000"> </A> |
---|
537 | <A NAME="compiler_instrumentation"> </A> |
---|
538 | <BR> |
---|
539 | Automatic Instrumentation |
---|
540 | </H1> |
---|
541 | |
---|
542 | <P> |
---|
543 | Automatic Instrumentation is the most convenient way to instrument your program. |
---|
544 | Simply use the compiler wrappers without any parameters, e.g.: |
---|
545 | |
---|
546 | <P> |
---|
547 | <BR> |
---|
548 | <BR> |
---|
549 | <code> % vtf90 myprog1.f90 myprog2.f90 -o myprog</code> |
---|
550 | <BR> |
---|
551 | |
---|
552 | <P> |
---|
553 | |
---|
554 | <H2><A NAME="SECTION00331000000000000000"> </A> |
---|
555 | <A NAME="bfdnotes"> </A> |
---|
556 | <BR> |
---|
557 | Notes for Using the GNU, Intel, or Pathscale Compiler |
---|
558 | </H2> |
---|
559 | For these compilers the library <SPAN CLASS="textit">BFD</SPAN> is required to get symbol information |
---|
560 | of the running application executable. This library is part of the <SPAN CLASS="textit">GNU Binutils</SPAN>, |
---|
561 | which is downloadable from <TT><A NAME="tex2html7" |
---|
562 | HREF="http://www.gnu.org/software/binutils">http://www.gnu.org/software/binutils</A></TT>. |
---|
563 | |
---|
564 | <P> |
---|
565 | To get the application executable for BFD during runtime, VampirTrace uses the <TT>/proc</TT> |
---|
566 | file system. As <TT>/proc</TT> is not present on all operating systems, automatic symbol |
---|
567 | information might not be available. In this case, it is necessary to set the environment |
---|
568 | variable <TT>VT_APPPATH</TT> to the pathname of the application executable to get symbols |
---|
569 | resolved via BFD. |
---|
570 | |
---|
571 | <P> |
---|
572 | Should any problems emerge to get symbol information by using BFD, then the environment |
---|
573 | variable <TT>VT_GNU_NMFILE</TT> can be set to a symbol list file, which is created with the |
---|
574 | command <TT>nm</TT>, like: |
---|
575 | |
---|
576 | <P> |
---|
577 | <BR> |
---|
578 | <BR> |
---|
579 | <code> % nm myprog > myprog.nm</code> |
---|
580 | <BR> |
---|
581 | <BR> |
---|
582 | <BR> |
---|
583 | Note that the output format of <TT>nm</TT> must be written in BSD-style. See the manual-page |
---|
584 | of nm for getting help about the output format setting. |
---|
585 | |
---|
586 | <P> |
---|
587 | |
---|
588 | <H2><A NAME="SECTION00332000000000000000"> |
---|
589 | Known License Issues with BFD</A> |
---|
590 | </H2> |
---|
591 | Please consider that BFD is deliviered under the GNU General Public License |
---|
592 | (GPL). So if you want to build binary packages including VampirTrace make sure |
---|
593 | to use the option <TT>-without-bfd</TT> to get a version without BFD. In this |
---|
594 | case you have to use <TT>nm</TT> to get symbol information from the running |
---|
595 | application executable (see Section <A HREF="#bfdnotes">2.3.1</A>). |
---|
596 | |
---|
597 | <P> |
---|
598 | |
---|
599 | <H2><A NAME="SECTION00333000000000000000"> |
---|
600 | Notes on Instrumentation of Inline Functions</A> |
---|
601 | </H2> |
---|
602 | Compilers behave differently when they automatically instrument inlined functions. |
---|
603 | The GNU and Intel ≥10.0 compilers instrument all functions by default when they |
---|
604 | are used with VampirTrace. They therefore switch off inlining completely, disregarding |
---|
605 | the optimization level chosen. |
---|
606 | One can prevent these particular functions from being instrumented by appending the |
---|
607 | following attribute to function declarations, hence |
---|
608 | making them able to be inlined (this works only for C/C++): |
---|
609 | |
---|
610 | <P> |
---|
611 | <BR> |
---|
612 | <BR> |
---|
613 | <code> __attribute__ ((__no_instrument_function__))</code> |
---|
614 | <BR> |
---|
615 | <BR> |
---|
616 | |
---|
617 | <P> |
---|
618 | The PGI and IBM compilers prefer inlining over instrumentation when compiling |
---|
619 | with inlining enabled. Thus, one needs to disable inlining to enable instrumentation |
---|
620 | of inline functions and vice versa. |
---|
621 | |
---|
622 | <P> |
---|
623 | The bottom line is that you cannot inline and instrument a function at the same time. |
---|
624 | For more information on how to inline functions read your compiler's manual. |
---|
625 | |
---|
626 | <P> |
---|
627 | |
---|
628 | <H1><A NAME="SECTION00340000000000000000"> </A> |
---|
629 | <A NAME="A1"> </A> |
---|
630 | <BR> |
---|
631 | Manual Instrumentation using the VampirTrace API |
---|
632 | </H1> |
---|
633 | |
---|
634 | <P> |
---|
635 | The <TT>VT_USER_START</TT>, <TT>VT_USER_END</TT> instrumentation calls |
---|
636 | can be used to mark any user-defined sequence of statements. |
---|
637 | |
---|
638 | <P> |
---|
639 | <PRE> |
---|
640 | Fortran: |
---|
641 | #include "vt_user.inc" |
---|
642 | VT_USER_START('name') |
---|
643 | ... |
---|
644 | VT_USER_END('name') |
---|
645 | </PRE> |
---|
646 | |
---|
647 | <P> |
---|
648 | <PRE> |
---|
649 | C: |
---|
650 | #include "vt_user.h" |
---|
651 | VT_USER_START("name"); |
---|
652 | ... |
---|
653 | VT_USER_END("name"); |
---|
654 | </PRE> |
---|
655 | If a block has several exit points (as it is often the case for |
---|
656 | functions), all exit points have to be instrumented by |
---|
657 | <TT>VT_USER_END</TT>, too. |
---|
658 | |
---|
659 | <P> |
---|
660 | For C++ it is simpler, as shown in the following example. Only entry points into a |
---|
661 | scope need to be marked. Exit points are detected automatically, when C++ |
---|
662 | deletes scope-local variables. |
---|
663 | |
---|
664 | <P> |
---|
665 | <PRE> |
---|
666 | C++: |
---|
667 | #include "vt_user.h" |
---|
668 | { |
---|
669 | VT_TRACER("name"); |
---|
670 | ... |
---|
671 | } |
---|
672 | </PRE> |
---|
673 | |
---|
674 | <P> |
---|
675 | For all three languages, the instrumented sources have to be compiled |
---|
676 | with <TT>-DVTRACE</TT> otherwise the <TT>VT_*</TT> calls are ignored. |
---|
677 | Note that Fortran source files instrumented this way have to be |
---|
678 | preprocessed, too. |
---|
679 | |
---|
680 | <P> |
---|
681 | In addition, you can combine this instrumentation type with all other ones. |
---|
682 | For example, all user functions can be instrumented by a compiler while |
---|
683 | special source code regions (e.g. loops) can be instrumented by VT's API. |
---|
684 | |
---|
685 | <P> |
---|
686 | Use VT's compiler wrapper (described above) for compiling and linking the |
---|
687 | instrumented source code, like: |
---|
688 | |
---|
689 | <UL> |
---|
690 | <LI>without other instrumentation (e.g., compiler): |
---|
691 | <BR> |
---|
692 | <BR><code> % vtcc -vt:inst manual myprog1.c -DVTRACE -o myprog</code> |
---|
693 | <BR> |
---|
694 | <BR> |
---|
695 | </LI> |
---|
696 | <LI>combined with compiler-instrumentation: |
---|
697 | <BR> |
---|
698 | <BR><code> % vtcc -vt:inst gnu myprog1.c -DVTRACE -o myprog</code> |
---|
699 | <BR> |
---|
700 | <BR> |
---|
701 | </LI> |
---|
702 | </UL> |
---|
703 | |
---|
704 | <P> |
---|
705 | Note that you can also use the option <TT>-vt:inst manual</TT> |
---|
706 | with non-instrumented sources. |
---|
707 | Binaries created this way only contain MPI and OpenMP instrumentation, |
---|
708 | which might be desirable in some cases. |
---|
709 | |
---|
710 | <P> |
---|
711 | |
---|
712 | <H1><A NAME="SECTION00350000000000000000"> </A> |
---|
713 | <A NAME="A2"> </A> |
---|
714 | <BR> |
---|
715 | Manual Instrumentation using POMP |
---|
716 | </H1> |
---|
717 | |
---|
718 | <P> |
---|
719 | POMP (OpenMP Profiling Tool) instrumentation directives are supported for |
---|
720 | Fortran and C/C++. The main advantage is that by using directives, the |
---|
721 | instrumentation is ignored during normal compilation. |
---|
722 | |
---|
723 | <P> |
---|
724 | The <TT>INST BEGIN</TT> and <TT>INST END</TT> directives can be used to mark |
---|
725 | any user-defined sequence of statements. |
---|
726 | If this block has several exit points, all but the last exit |
---|
727 | point have to be instrumented by <TT>INST ALTEND</TT>. |
---|
728 | |
---|
729 | <P> |
---|
730 | <PRE> |
---|
731 | Fortran: |
---|
732 | !POMP$ INST BEGIN(name) |
---|
733 | ... |
---|
734 | [ !POMP$ INST ALTEND(name) ] |
---|
735 | ... |
---|
736 | !POMP$ INST END(name) |
---|
737 | </PRE> |
---|
738 | |
---|
739 | <P> |
---|
740 | <PRE> |
---|
741 | C/C++: |
---|
742 | #pragma pomp inst begin(name) |
---|
743 | ... |
---|
744 | [ #pragma pomp inst altend(name) ] |
---|
745 | ... |
---|
746 | #pragma pomp inst end(name) |
---|
747 | </PRE> |
---|
748 | At least the main program function has to be instrumented in this way, and |
---|
749 | additionally, the following must be inserted as the first executable |
---|
750 | statement of the main program: |
---|
751 | |
---|
752 | <P> |
---|
753 | <PRE> |
---|
754 | Fortran: |
---|
755 | !POMP$ INST INIT |
---|
756 | </PRE> |
---|
757 | |
---|
758 | <P> |
---|
759 | <PRE> |
---|
760 | C/C++: |
---|
761 | #pragma pomp inst init |
---|
762 | </PRE> |
---|
763 | |
---|
764 | <P> |
---|
765 | |
---|
766 | <H1><A NAME="SECTION00360000000000000000"> </A> |
---|
767 | <A NAME="A3"> </A> |
---|
768 | <BR> |
---|
769 | Binary instrumentation using Dyninst |
---|
770 | </H1> |
---|
771 | |
---|
772 | <P> |
---|
773 | The option <TT>-vt:inst dyninst</TT> selects the compiler wrapper to |
---|
774 | instrument the application during run-time (binary instrumentation) by using |
---|
775 | Dyninst (<TT><A NAME="tex2html8" |
---|
776 | HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>). |
---|
777 | Recompiling is not necessary for this way of instrumenting, |
---|
778 | but relinking, as shown: |
---|
779 | |
---|
780 | <P> |
---|
781 | <BR> |
---|
782 | <BR> |
---|
783 | <code> % vtf90 -vt:inst dyninst myprog1.o myprog2.o -o myprog</code> |
---|
784 | <BR> |
---|
785 | <BR> |
---|
786 | <BR> |
---|
787 | The compiler wrapper dynamically links the library <TT>libvt.dynatt.so</TT> |
---|
788 | to the application. This library attaches the <SPAN CLASS="textit">Mutator</SPAN>-program |
---|
789 | <TT>vtdyn</TT> during run-time which invokes the instrumenting by using |
---|
790 | the Dyninst-API. |
---|
791 | Note that the application should have been compiled with the <TT>-g</TT> |
---|
792 | switch in order to have symbol names visible. |
---|
793 | After a trace-run by using this way of instrumenting, the <TT>vtunify</TT> |
---|
794 | utility needs to be invoked manually (see Sections <A HREF="#unification">3.3</A> and <A HREF="#VTUNIFY">A.2</A>). |
---|
795 | |
---|
796 | <P> |
---|
797 | To prevent certain functions from being instrumented you can set |
---|
798 | the environment variable <TT>VT_DYN_BLACKLIST</TT> to a file containing |
---|
799 | a newline-separated list of function names. All additional overhead due to instrumentation |
---|
800 | of these functions will be removed. |
---|
801 | |
---|
802 | <P> |
---|
803 | VampirTrace also allows binary instrumentation of functions located in shared libraries. |
---|
804 | Ensure that the shared libraries have been compiled with <TT>-g</TT> and |
---|
805 | assign a colon-separated list of their names to |
---|
806 | the environment variable <TT>VT_DYN_SHLIBS</TT>, e.g.: |
---|
807 | |
---|
808 | <P> |
---|
809 | <BR> |
---|
810 | <BR> |
---|
811 | <code> VT_DYN_SHLIBS=libsupport.so:libmath.so</code> |
---|
812 | <BR> |
---|
813 | <BR> |
---|
814 | |
---|
815 | <P> |
---|
816 | |
---|
817 | <H1><A NAME="SECTION00400000000000000000"> |
---|
818 | Runtime Measurement</A> |
---|
819 | </H1> |
---|
820 | |
---|
821 | <P> |
---|
822 | By default, running a VampirTrace instrumented application should result in an |
---|
823 | OTF trace file in the current working directory where the application was |
---|
824 | executed. Use the environment variables <TT>VT_FILE_PREFIX</TT> and <TT>VT_PFORM_GDIR</TT> |
---|
825 | described below to change the name of the trace file and its final location. |
---|
826 | In case a problem occurs, set the environment variable <TT>VT_VERBOSE</TT> to <TT>yes</TT> before |
---|
827 | executing the instrumented application in order to see control messages of the |
---|
828 | VampirTrace run-time system which might help tracking down the problem. |
---|
829 | |
---|
830 | <P> |
---|
831 | The internal buffer of VampirTrace is limited to 32 MB. Use the environment |
---|
832 | variable <TT>VT_BUFFER_SIZE</TT> and <TT>VT_MAX_FLUSHES</TT> to increase |
---|
833 | this limit. Section <A HREF="#trace_file_size">3.2</A> contains further information on |
---|
834 | influencing trace file size. |
---|
835 | |
---|
836 | <P> |
---|
837 | |
---|
838 | <H1><A NAME="SECTION00410000000000000000"> |
---|
839 | Environment Variables</A> |
---|
840 | </H1> |
---|
841 | |
---|
842 | <P> |
---|
843 | The following environment variables can be used to control the measurement |
---|
844 | of a VampirTrace instrumented executable: |
---|
845 | |
---|
846 | <P> |
---|
847 | |
---|
848 | <P> |
---|
849 | <TABLE CELLPADDING=3> |
---|
850 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">Variable</SPAN></TH> |
---|
851 | <TH ALIGN="LEFT"><SPAN CLASS="textbf">Purpose</SPAN></TH> |
---|
852 | <TH ALIGN="LEFT"><SPAN CLASS="textbf">Default</SPAN></TH> |
---|
853 | </TR> |
---|
854 | <TR><TD ALIGN="LEFT"><TT>VT_PFORM_GDIR</TT></TD> |
---|
855 | <TD ALIGN="LEFT">Name of global directory to store final trace file in</TD> |
---|
856 | <TD ALIGN="LEFT"><TT>./</TT></TD> |
---|
857 | </TR> |
---|
858 | <TR><TD ALIGN="LEFT"><TT>VT_PFORM_LDIR</TT></TD> |
---|
859 | <TD ALIGN="LEFT">Name of node-local directory that can be used to store temporary trace files</TD> |
---|
860 | <TD ALIGN="LEFT"><TT>/tmp/</TT></TD> |
---|
861 | </TR> |
---|
862 | <TR><TD ALIGN="LEFT"><TT>VT_FILE_PREFIX</TT></TD> |
---|
863 | <TD ALIGN="LEFT">Prefix used for trace filenames</TD> |
---|
864 | <TD ALIGN="LEFT"><TT>a</TT></TD> |
---|
865 | </TR> |
---|
866 | <TR><TD ALIGN="LEFT"><TT>VT_APPPATH</TT></TD> |
---|
867 | <TD ALIGN="LEFT">Path to the application executable</TD> |
---|
868 | <TD ALIGN="LEFT">-</TD> |
---|
869 | </TR> |
---|
870 | <TR><TD ALIGN="LEFT"><TT>VT_BUFFER_SIZE</TT></TD> |
---|
871 | <TD ALIGN="LEFT">Size of internal event trace buffer. This is the place where |
---|
872 | event records are stored, before being written to a file.</TD> |
---|
873 | <TD ALIGN="LEFT">32M</TD> |
---|
874 | </TR> |
---|
875 | <TR><TD ALIGN="LEFT"><TT>VT_MAX_FLUSHES</TT></TD> |
---|
876 | <TD ALIGN="LEFT">Maximum number of buffer flushes</TD> |
---|
877 | <TD ALIGN="LEFT">1</TD> |
---|
878 | </TR> |
---|
879 | <TR><TD ALIGN="LEFT"><TT>VT_VERBOSE</TT></TD> |
---|
880 | <TD ALIGN="LEFT">Print VampirTrace related control information during measurement?</TD> |
---|
881 | <TD ALIGN="LEFT">no</TD> |
---|
882 | </TR> |
---|
883 | <TR><TD ALIGN="LEFT"><TT>VT_METRICS</TT></TD> |
---|
884 | <TD ALIGN="LEFT">Specify counter metrics to be recorded with trace events as a |
---|
885 | colon-separated list of names. (for details see Appendix <A HREF="#papi">B</A>)</TD> |
---|
886 | <TD ALIGN="LEFT">-</TD> |
---|
887 | </TR> |
---|
888 | <TR><TD ALIGN="LEFT"><TT>VT_MEMTRACE</TT></TD> |
---|
889 | <TD ALIGN="LEFT">Enable memory allocation counters? (see Sec. <A HREF="#mem_alloc_counters">4.2</A>)</TD> |
---|
890 | <TD ALIGN="LEFT">no</TD> |
---|
891 | </TR> |
---|
892 | <TR><TD ALIGN="LEFT"><TT>VT_IOTRACE</TT></TD> |
---|
893 | <TD ALIGN="LEFT">Enable tracing of application I/O calls? (see Sec. <A HREF="#app_io_calls">4.3</A>)</TD> |
---|
894 | <TD ALIGN="LEFT">no</TD> |
---|
895 | </TR> |
---|
896 | <TR><TD ALIGN="LEFT"><TT>VT_MPITRACE</TT></TD> |
---|
897 | <TD ALIGN="LEFT">Enable tracing of MPI events?</TD> |
---|
898 | <TD ALIGN="LEFT">yes</TD> |
---|
899 | </TR> |
---|
900 | <TR><TD ALIGN="LEFT"><TT>VT_DYN_BLACKLIST</TT></TD> |
---|
901 | <TD ALIGN="LEFT">Name of blacklist file for Dyninst instrumentation (see Section <A HREF="#A3">2.6</A>)</TD> |
---|
902 | <TD ALIGN="LEFT">-</TD> |
---|
903 | </TR> |
---|
904 | <TR><TD ALIGN="LEFT"><TT>VT_DYN_SHLIBS</TT></TD> |
---|
905 | <TD ALIGN="LEFT">Colon-separated list of shared libraries for Dyninst instrumentation (see Section <A HREF="#A3">2.6</A>)</TD> |
---|
906 | <TD ALIGN="LEFT">-</TD> |
---|
907 | </TR> |
---|
908 | <TR><TD ALIGN="LEFT"><TT>VT_FILTER_SPEC</TT></TD> |
---|
909 | <TD ALIGN="LEFT">Name of function/region filter file (see Section <A HREF="#function_filter">5.1</A>)</TD> |
---|
910 | <TD ALIGN="LEFT">-</TD> |
---|
911 | </TR> |
---|
912 | <TR><TD ALIGN="LEFT"><TT>VT_GROUPS_SPEC</TT></TD> |
---|
913 | <TD ALIGN="LEFT">Name of function grouping file |
---|
914 | (See Section <A HREF="#function_groups">5.2</A>)</TD> |
---|
915 | <TD ALIGN="LEFT">-</TD> |
---|
916 | </TR> |
---|
917 | <TR><TD ALIGN="LEFT"><TT>VT_UNIFY</TT></TD> |
---|
918 | <TD ALIGN="LEFT">Unify local trace files afterwards?</TD> |
---|
919 | <TD ALIGN="LEFT">yes</TD> |
---|
920 | </TR> |
---|
921 | <TR><TD ALIGN="LEFT"><TT>VT_COMPRESSION</TT></TD> |
---|
922 | <TD ALIGN="LEFT">Write compressed trace files?</TD> |
---|
923 | <TD ALIGN="LEFT">yes</TD> |
---|
924 | </TR> |
---|
925 | </TABLE> |
---|
926 | |
---|
927 | <P> |
---|
928 | The value for the first three variables can contain (sub)strings of the |
---|
929 | form <TT>$XYZ</TT> or <TT>${XYZ}</TT> where <TT>XYZ</TT> is the name of |
---|
930 | another environment variable. |
---|
931 | Evaluation of the environment variable is done at measurement run-time. |
---|
932 | |
---|
933 | <P> |
---|
934 | When you use these environment variables, make sure that they have the same |
---|
935 | value for all processes of your application on <SPAN CLASS="textbf">all</SPAN> nodes of your cluster. |
---|
936 | Some cluster environments do not automatically transfer your environment |
---|
937 | when executing parts of your job on remote nodes of the cluster, and you |
---|
938 | may need to explicitly set and export them in batch job submission scripts. |
---|
939 | |
---|
940 | <P> |
---|
941 | |
---|
942 | <H1><A NAME="SECTION00420000000000000000"> </A> |
---|
943 | <A NAME="trace_file_size"> </A> |
---|
944 | <BR> |
---|
945 | Influencing Trace File Size |
---|
946 | </H1> |
---|
947 | |
---|
948 | <P> |
---|
949 | The default values of the environment variables <TT>VT_BUFFER_SIZE</TT> and <BR><TT>VT_MAX_FLUSHES</TT> limit the internal buffer of VampirTrace to |
---|
950 | 32 MB and the number of times that the buffer is flushed to 1. Events that |
---|
951 | should be recorded after the limit has been reached are no longer written into |
---|
952 | the trace file. The environment variables apply to every process of a |
---|
953 | parallel application, meaning that applications with <SPAN CLASS="textit">n</SPAN> processes |
---|
954 | will typically create trace files <SPAN CLASS="textit">n</SPAN> times the size of a serial |
---|
955 | application. |
---|
956 | |
---|
957 | <P> |
---|
958 | To remove the limit and get a complete trace of an application, set <BR><TT>VT_MAX_FLUSHES</TT> to <TT>0</TT>. This causes VampirTrace to always |
---|
959 | write the buffer to disk when the buffer is full. To change the size of the |
---|
960 | buffer, use the variable <TT>VT_BUFFER_SIZE</TT>. The optimal value for |
---|
961 | this variable depends on the application that should be traced. Setting a |
---|
962 | small value will increase the memory that is available to the application but |
---|
963 | will trigger frequent buffer flushes by VampirTrace. These buffer flushes can |
---|
964 | significantly change the behavior of the application. On the other hand, |
---|
965 | setting a large value, like <TT>2G</TT>, will minimize buffer flushes by |
---|
966 | VampirTrace, but decrease the memory available to the application. If not |
---|
967 | enough memory is available to hold the VampirTrace buffer and the application |
---|
968 | data this may cause parts of the application to be swapped to disk leading |
---|
969 | also to a significant change in the behavior of the application. |
---|
970 | |
---|
971 | <P> |
---|
972 | |
---|
973 | <H1><A NAME="SECTION00430000000000000000"> </A> |
---|
974 | <A NAME="unification"> </A> |
---|
975 | <BR> |
---|
976 | Unification of local Traces |
---|
977 | </H1> |
---|
978 | |
---|
979 | <P> |
---|
980 | After a run of an instrumented application the traces of the single |
---|
981 | processes need to be <SPAN CLASS="textit">unified</SPAN> in terms of timestamps and event IDs. |
---|
982 | In most cases, this happens automatically. |
---|
983 | But under certain circumstances it is necessary to perform unification of local |
---|
984 | traces manually. To do this, use the command: |
---|
985 | |
---|
986 | <P> |
---|
987 | <BR> |
---|
988 | <BR> |
---|
989 | <code> % vtunify <no-of-traces> <prefix></code> |
---|
990 | <BR> |
---|
991 | <BR> |
---|
992 | <BR> |
---|
993 | For example, this is required on the BlueGene/L platform or when using Dyninst |
---|
994 | instrumentation. |
---|
995 | |
---|
996 | <P> |
---|
997 | |
---|
998 | <H1><A NAME="SECTION00500000000000000000"> |
---|
999 | Recording additional Events and Counters</A> |
---|
1000 | </H1> |
---|
1001 | |
---|
1002 | <P> |
---|
1003 | |
---|
1004 | <H1><A NAME="SECTION00510000000000000000"> |
---|
1005 | PAPI Hardware Performance Counters</A> |
---|
1006 | </H1> |
---|
1007 | |
---|
1008 | <P> |
---|
1009 | If VampirTrace has been built with hardware-counter support enabled (see |
---|
1010 | Section <A HREF="#install">C</A>), VampirTrace is capable of recording hardware counter |
---|
1011 | information as part of the event records. To request the measurement of |
---|
1012 | certain counters, the user must set the environment variable <TT>VT_METRICS</TT>. |
---|
1013 | The variable should contain a colon-separated list of counter names, |
---|
1014 | or a predefined platform-specific group. |
---|
1015 | Metric names can be any PAPI preset names or PAPI native counter names. |
---|
1016 | For example, set |
---|
1017 | |
---|
1018 | <P> |
---|
1019 | <BR> |
---|
1020 | <BR> |
---|
1021 | <code> VT_METRICS=PAPI_FP_OPS:PAPI_L2_TCM</code> |
---|
1022 | <BR> |
---|
1023 | <BR> |
---|
1024 | <BR> |
---|
1025 | to record the number of floating point instructions and level 2 cache misses. |
---|
1026 | See Appendix <A HREF="#papi">B</A> for a full list of PAPI preset counters. |
---|
1027 | |
---|
1028 | <P> |
---|
1029 | The user can leave the environment variable unset to indicate that no |
---|
1030 | counters are requested. If any of the requested counters are not recognized |
---|
1031 | or the full list of counters cannot be recorded due to hardware-resource |
---|
1032 | limits, program execution will be aborted with an error message. |
---|
1033 | |
---|
1034 | <P> |
---|
1035 | |
---|
1036 | <H1><A NAME="SECTION00520000000000000000"> </A> |
---|
1037 | <A NAME="mem_alloc_counters"> </A> |
---|
1038 | <BR> |
---|
1039 | Memory Allocation Counters |
---|
1040 | </H1> |
---|
1041 | |
---|
1042 | <P> |
---|
1043 | The GNU glibc implementation provides a special hook mechanism that allows |
---|
1044 | intercepting all calls to allocation and free functions |
---|
1045 | (e.g. <TT>malloc</TT>, <TT>realloc</TT>, <TT>free</TT>). |
---|
1046 | This is independent from compilation or source code access, but relies on the |
---|
1047 | underlying system library. |
---|
1048 | |
---|
1049 | <P> |
---|
1050 | If VampirTrace has been built with memory-tracing support enabled (see |
---|
1051 | Section <A HREF="#install">C</A>), VampirTrace is capable of recording memory allocation |
---|
1052 | information as part of the event records. To request the measurement of |
---|
1053 | the application's allocated memory, the user must set the environment variable |
---|
1054 | <TT>VT_MEMTRACE</TT> to <TT>yes</TT>. |
---|
1055 | |
---|
1056 | <P> |
---|
1057 | |
---|
1058 | <H4><A NAME="SECTION00520010000000000000"> |
---|
1059 | Note:</A> |
---|
1060 | </H4> |
---|
1061 | This approach to get memory allocation information requires changing internal |
---|
1062 | function pointers in a non-thread-safe way, so VampirTrace doesn't support |
---|
1063 | memory tracing for OpenMP-parallelized programs! |
---|
1064 | |
---|
1065 | <P> |
---|
1066 | |
---|
1067 | <H1><A NAME="SECTION00530000000000000000"> </A> |
---|
1068 | <A NAME="app_io_calls"> </A> |
---|
1069 | <BR> |
---|
1070 | Application I/O Calls |
---|
1071 | </H1> |
---|
1072 | |
---|
1073 | <P> |
---|
1074 | Calls to functions which reside in external libraries can be intercepted by |
---|
1075 | implementing identical functions and linking them before the external library. |
---|
1076 | Such ``wrapper functions'' can record the parameters and return values of the |
---|
1077 | library functions. |
---|
1078 | |
---|
1079 | <P> |
---|
1080 | If VampirTrace has been built with I/O tracing support, it uses this technique |
---|
1081 | for recording calls to I/O functions of the standard C library which are |
---|
1082 | executed by the application. Following functions |
---|
1083 | are intercepted by VampirTrace: |
---|
1084 | |
---|
1085 | <P> |
---|
1086 | <TABLE CELLPADDING=3> |
---|
1087 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>open</TT></TD> |
---|
1088 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>read</TT></TD> |
---|
1089 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fdopen</TT></TD> |
---|
1090 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fread</TT></TD> |
---|
1091 | </TR> |
---|
1092 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>open64</TT></TD> |
---|
1093 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>write</TT></TD> |
---|
1094 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fopen</TT></TD> |
---|
1095 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fwrite</TT></TD> |
---|
1096 | </TR> |
---|
1097 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>creat</TT></TD> |
---|
1098 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>readv</TT></TD> |
---|
1099 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fopen64</TT></TD> |
---|
1100 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fgetc</TT></TD> |
---|
1101 | </TR> |
---|
1102 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>creat64</TT></TD> |
---|
1103 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>writev</TT></TD> |
---|
1104 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fclose</TT></TD> |
---|
1105 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>getc</TT></TD> |
---|
1106 | </TR> |
---|
1107 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>close</TT></TD> |
---|
1108 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>pread</TT></TD> |
---|
1109 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fseek</TT></TD> |
---|
1110 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fputc</TT></TD> |
---|
1111 | </TR> |
---|
1112 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>dup</TT></TD> |
---|
1113 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>pwrite</TT></TD> |
---|
1114 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fseeko</TT></TD> |
---|
1115 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>putc</TT></TD> |
---|
1116 | </TR> |
---|
1117 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>dup2</TT></TD> |
---|
1118 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>pread64</TT></TD> |
---|
1119 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fseeko64</TT></TD> |
---|
1120 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fgets</TT></TD> |
---|
1121 | </TR> |
---|
1122 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>lseek</TT></TD> |
---|
1123 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>pwrite64</TT></TD> |
---|
1124 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>rewind</TT></TD> |
---|
1125 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fputs</TT></TD> |
---|
1126 | </TR> |
---|
1127 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>lseek64</TT></TD> |
---|
1128 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110> </TD> |
---|
1129 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fsetpos</TT></TD> |
---|
1130 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fscanf</TT></TD> |
---|
1131 | </TR> |
---|
1132 | <TR><TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110> </TD> |
---|
1133 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110> </TD> |
---|
1134 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fsetpos64</TT></TD> |
---|
1135 | <TD ALIGN="LEFT" VALIGN="TOP" WIDTH=110><TT>fprintf</TT></TD> |
---|
1136 | </TR> |
---|
1137 | </TABLE> |
---|
1138 | |
---|
1139 | <P> |
---|
1140 | The gathered information will be saved |
---|
1141 | as I/O event records in the trace file. This feature has to be activated for |
---|
1142 | each tracing run by setting the environment variable <TT>VT_IOTRACE</TT> to |
---|
1143 | <TT>yes</TT>. |
---|
1144 | |
---|
1145 | <P> |
---|
1146 | |
---|
1147 | <H1><A NAME="SECTION00540000000000000000"> |
---|
1148 | User Defined Counters</A> |
---|
1149 | </H1> |
---|
1150 | |
---|
1151 | <P> |
---|
1152 | In addition to the manual instrumentation (see Section <A HREF="#A1">2.4</A>) the VampirTrace API |
---|
1153 | provides instrumentation calls which allow recording of |
---|
1154 | program variable values (e.g. iteration counts, calculation results, ...) or any other |
---|
1155 | numerical quantity. |
---|
1156 | A user defined counter is identified by its name, the counter group it belongs to, |
---|
1157 | the type of its value (integer or floating-point), and the unit that the value is |
---|
1158 | quoted (e.g. ``GFlop/sec''). |
---|
1159 | |
---|
1160 | <P> |
---|
1161 | The <TT>VT_COUNT_GROUP_DEF</TT> and <TT>VT_COUNT_DEF</TT> instrumentation |
---|
1162 | calls can be used to define counter groups and counters: |
---|
1163 | |
---|
1164 | <P> |
---|
1165 | <PRE> |
---|
1166 | Fortran: |
---|
1167 | #include "vt_user.inc" |
---|
1168 | integer :: id, gid |
---|
1169 | VT_COUNT_GROUP_DEF('name', gid) |
---|
1170 | VT_COUNT_DEF('name', 'unit', type, gid, id) |
---|
1171 | </PRE> |
---|
1172 | |
---|
1173 | <P> |
---|
1174 | <PRE> |
---|
1175 | C/C++: |
---|
1176 | #include "vt_user.h" |
---|
1177 | unsigned int id, gid; |
---|
1178 | gid = VT_COUNT_GROUP_DEF('name'); |
---|
1179 | id = VT_COUNT_DEF("name", "unit", type, gid); |
---|
1180 | </PRE> |
---|
1181 | |
---|
1182 | <P> |
---|
1183 | The definition of a counter group is optionally. If no special counter group is desired |
---|
1184 | the default group ``User'' can be used. |
---|
1185 | In this case, set the parameter <TT>gid</TT> of <TT>VT_COUNT_DEF</TT> to |
---|
1186 | <TT>VT_COUNT_DEFGROUP</TT>. |
---|
1187 | |
---|
1188 | <P> |
---|
1189 | The third parameter <TT>type</TT> of <TT>VT_COUNT_DEF</TT> specifies the data |
---|
1190 | type of the counter value. To record a value for any of the defined counters the |
---|
1191 | corresponding instrumentation call <TT>VT_COUNT_*_VAL</TT> must be invoked. |
---|
1192 | |
---|
1193 | <P> |
---|
1194 | |
---|
1195 | <P> |
---|
1196 | <TABLE CELLPADDING=3> |
---|
1197 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">Fortran:</SPAN></TH> |
---|
1198 | <TD ALIGN="LEFT"> </TD> |
---|
1199 | <TD ALIGN="LEFT"> </TD> |
---|
1200 | </TR> |
---|
1201 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">Type</SPAN></TH> |
---|
1202 | <TD ALIGN="LEFT"><SPAN CLASS="textbf">Count call</SPAN></TD> |
---|
1203 | <TD ALIGN="LEFT"><SPAN CLASS="textbf">Data type</SPAN></TD> |
---|
1204 | </TR> |
---|
1205 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_INTEGER</TT></TD> |
---|
1206 | <TD ALIGN="LEFT"><TT>VT_COUNT_INTEGER_VAL</TT></TD> |
---|
1207 | <TD ALIGN="LEFT">integer (4 byte)</TD> |
---|
1208 | </TR> |
---|
1209 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_INTEGER8</TT></TD> |
---|
1210 | <TD ALIGN="LEFT"><TT>VT_COUNT_INTEGER8_VAL</TT></TD> |
---|
1211 | <TD ALIGN="LEFT">integer (8 byte)</TD> |
---|
1212 | </TR> |
---|
1213 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_REAL</TT></TD> |
---|
1214 | <TD ALIGN="LEFT"><TT>VT_COUNT_REAL_VAL</TT></TD> |
---|
1215 | <TD ALIGN="LEFT">real</TD> |
---|
1216 | </TR> |
---|
1217 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_DOUBLE</TT></TD> |
---|
1218 | <TD ALIGN="LEFT"><TT>VT_COUNT_DOUBLE_VAL</TT></TD> |
---|
1219 | <TD ALIGN="LEFT">double precision</TD> |
---|
1220 | </TR> |
---|
1221 | </TABLE> |
---|
1222 | |
---|
1223 | <P> |
---|
1224 | <TABLE CELLPADDING=3> |
---|
1225 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">C/C++:</SPAN></TH> |
---|
1226 | <TD ALIGN="LEFT"> </TD> |
---|
1227 | <TD ALIGN="LEFT"> </TD> |
---|
1228 | </TR> |
---|
1229 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">Type</SPAN></TH> |
---|
1230 | <TD ALIGN="LEFT"><SPAN CLASS="textbf">Count call</SPAN></TD> |
---|
1231 | <TD ALIGN="LEFT"><SPAN CLASS="textbf">Data type</SPAN></TD> |
---|
1232 | </TR> |
---|
1233 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_SIGNED</TT></TD> |
---|
1234 | <TD ALIGN="LEFT"><TT>VT_COUNT_SIGNED_VAL</TT></TD> |
---|
1235 | <TD ALIGN="LEFT">signed int (max. 64-bit)</TD> |
---|
1236 | </TR> |
---|
1237 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_UNSIGNED</TT></TD> |
---|
1238 | <TD ALIGN="LEFT"><TT>VT_COUNT_UNSIGNED_VAL</TT></TD> |
---|
1239 | <TD ALIGN="LEFT">unsigned int (max. 64-bit)</TD> |
---|
1240 | </TR> |
---|
1241 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_FLOAT</TT></TD> |
---|
1242 | <TD ALIGN="LEFT"><TT>VT_COUNT_FLOAT_VAL</TT></TD> |
---|
1243 | <TD ALIGN="LEFT">float</TD> |
---|
1244 | </TR> |
---|
1245 | <TR><TD ALIGN="LEFT"><TT>VT_COUNT_TYPE_DOUBLE</TT></TD> |
---|
1246 | <TD ALIGN="LEFT"><TT>VT_COUNT_DOUBLE_VAL</TT></TD> |
---|
1247 | <TD ALIGN="LEFT">double</TD> |
---|
1248 | </TR> |
---|
1249 | </TABLE> |
---|
1250 | |
---|
1251 | <P> |
---|
1252 | The following example records the loop index <TT>i</TT>: |
---|
1253 | |
---|
1254 | <P> |
---|
1255 | <PRE> |
---|
1256 | Fortran: |
---|
1257 | |
---|
1258 | #include "vt_user.inc" |
---|
1259 | |
---|
1260 | program main |
---|
1261 | integer :: i, cid, cgid |
---|
1262 | |
---|
1263 | VT_COUNT_GROUP_DEF('loopindex', cgid) |
---|
1264 | VT_COUNT_DEF('i', '#', VT_COUNT_TYPE_INTEGER, cgid, cid) |
---|
1265 | |
---|
1266 | do i=1,100 |
---|
1267 | VT_COUNT_INTEGER_VAL(cid, i) |
---|
1268 | end do |
---|
1269 | |
---|
1270 | end program main |
---|
1271 | </PRE> |
---|
1272 | |
---|
1273 | <P> |
---|
1274 | <PRE> |
---|
1275 | C/C++: |
---|
1276 | |
---|
1277 | #include "vt_user.h" |
---|
1278 | |
---|
1279 | int main() { |
---|
1280 | unsigned int i, cid, cgid; |
---|
1281 | |
---|
1282 | cgid = VT_COUNT_GROUP_DEF('loopindex'); |
---|
1283 | cid = VT_COUNT_DEF("i", "#", VT_COUNT_TYPE_UNSIGNED, |
---|
1284 | cgid); |
---|
1285 | |
---|
1286 | for( i = 1; i <= 100; i++ ) { |
---|
1287 | VT_COUNT_UNSIGNED_VAL(cid, i); |
---|
1288 | } |
---|
1289 | |
---|
1290 | return 0; |
---|
1291 | } |
---|
1292 | </PRE> |
---|
1293 | |
---|
1294 | <P> |
---|
1295 | For all three languages the instrumented sources have to be compiled |
---|
1296 | with <TT>-DVTRACE</TT>. Otherwise the <TT>VT_*</TT> calls are ignored. |
---|
1297 | If additionally any functions or regions are manually instrumented by VT's API |
---|
1298 | (see Section <A HREF="#A1">2.4</A>) and only the instrumentation calls for user defined |
---|
1299 | counter should be disabled, then the sources have to be compiled with |
---|
1300 | <TT>-DVTRACE_NO_COUNT</TT>, too. |
---|
1301 | |
---|
1302 | <P> |
---|
1303 | |
---|
1304 | <H1><A NAME="SECTION00600000000000000000"> |
---|
1305 | Filtering & Grouping</A> |
---|
1306 | </H1> |
---|
1307 | |
---|
1308 | <P> |
---|
1309 | |
---|
1310 | <H1><A NAME="SECTION00610000000000000000"> </A> |
---|
1311 | <A NAME="function_filter"> </A> |
---|
1312 | <BR> |
---|
1313 | Function Filtering |
---|
1314 | </H1> |
---|
1315 | |
---|
1316 | <P> |
---|
1317 | By default, all calls of instrumented functions will be traced, so that the |
---|
1318 | resulting trace files can easily become very large. In order to decrease the |
---|
1319 | size of a trace, VampirTrace allows the specification of filter directives |
---|
1320 | before running an instrumented application. |
---|
1321 | The user can decide on how often an instrumented function/region is to be |
---|
1322 | recorded to a trace file. |
---|
1323 | To use a filter, the environment variable <TT>VT_FILTER_SPEC</TT> needs to be |
---|
1324 | defined. It should contain the path and name of a file with filter directives. |
---|
1325 | |
---|
1326 | <P> |
---|
1327 | Below, there is an example of a file containing filter directives: |
---|
1328 | |
---|
1329 | <P> |
---|
1330 | <PRE> |
---|
1331 | # VampirTrace region filter specification |
---|
1332 | # |
---|
1333 | # call limit definitions and region assignments |
---|
1334 | # |
---|
1335 | # syntax: <regions> -- <limit> |
---|
1336 | # |
---|
1337 | # regions semicolon-separated list of regions |
---|
1338 | # (can be wildcards) |
---|
1339 | # limit assigned call limit |
---|
1340 | # 0 = region(s) denied |
---|
1341 | # -1 = unlimited |
---|
1342 | # |
---|
1343 | add;sub;mul;div -- 1000 |
---|
1344 | * -- 3000000 |
---|
1345 | </PRE> |
---|
1346 | |
---|
1347 | <P> |
---|
1348 | These region filter directives cause that the functions <TT>add</TT>, |
---|
1349 | <TT>sub</TT>, <TT>mul</TT> and <TT>div</TT> to be recorded at most 1000 times. |
---|
1350 | The remaining functions <TT>*</TT> will be recorded at most 3000000 times. |
---|
1351 | |
---|
1352 | <P> |
---|
1353 | Besides creating filter files by hand, you can also use the <TT>vtfilter</TT> |
---|
1354 | tool to generate them automatically. This tool reads the provided trace |
---|
1355 | and decides whether a function should be filtered or not, based on the evaluation of |
---|
1356 | certain parameters. For more information see Section <A HREF="#VTFILTER">A.4</A>. |
---|
1357 | |
---|
1358 | <P> |
---|
1359 | |
---|
1360 | <H1><A NAME="SECTION00620000000000000000"> </A> |
---|
1361 | <A NAME="function_groups"> </A> |
---|
1362 | <BR> |
---|
1363 | Function Grouping |
---|
1364 | </H1> |
---|
1365 | |
---|
1366 | <P> |
---|
1367 | VampirTrace allows assigning functions/regions to a group. |
---|
1368 | Groups can, for instance, be highlighted by different colors in Vampir displays. |
---|
1369 | The following standard groups are created by VampirTrace: |
---|
1370 | |
---|
1371 | <P> |
---|
1372 | <TABLE CELLPADDING=3> |
---|
1373 | <TR><TH ALIGN="LEFT"><SPAN CLASS="textbf">Group name</SPAN></TH> |
---|
1374 | <TH ALIGN="LEFT"><SPAN CLASS="textbf">Contained functions/regions</SPAN></TH> |
---|
1375 | </TR> |
---|
1376 | <TR><TD ALIGN="LEFT"><TT>MPI</TT></TD> |
---|
1377 | <TD ALIGN="LEFT">MPI functions</TD> |
---|
1378 | </TR> |
---|
1379 | <TR><TD ALIGN="LEFT"><TT>OMP</TT></TD> |
---|
1380 | <TD ALIGN="LEFT">OpenMP constructs and functions</TD> |
---|
1381 | </TR> |
---|
1382 | <TR><TD ALIGN="LEFT"><TT>MEM</TT></TD> |
---|
1383 | <TD ALIGN="LEFT">Memory allocation functions (see <A HREF="#mem_alloc_counters">4.2</A>)</TD> |
---|
1384 | </TR> |
---|
1385 | <TR><TD ALIGN="LEFT"><TT>I/O</TT></TD> |
---|
1386 | <TD ALIGN="LEFT">I/O functions (see <A HREF="#app_io_calls">4.3</A>)</TD> |
---|
1387 | </TR> |
---|
1388 | <TR><TD ALIGN="LEFT"><TT>Application</TT></TD> |
---|
1389 | <TD ALIGN="LEFT">remaining instrumented functions and source code regions</TD> |
---|
1390 | </TR> |
---|
1391 | </TABLE> |
---|
1392 | |
---|
1393 | <P> |
---|
1394 | Additionally, you can create your own groups, e.g. to better distinguish |
---|
1395 | different phases of an application. |
---|
1396 | To use function/region grouping set the environment variable |
---|
1397 | <TT>VT_GROUPS_SPEC</TT> to the path of a file which contains the group |
---|
1398 | assignments. |
---|
1399 | Below, there is an example of how to use group assignments: |
---|
1400 | |
---|
1401 | <P> |
---|
1402 | <PRE> |
---|
1403 | # VampirTrace region groups specification |
---|
1404 | # |
---|
1405 | # group definitions and region assignments |
---|
1406 | # |
---|
1407 | # syntax: <group>=<regions> |
---|
1408 | # |
---|
1409 | # group group name |
---|
1410 | # regions semicolon-separated list of regions |
---|
1411 | # (can be wildcards) |
---|
1412 | # |
---|
1413 | CALC=add;sub;mul;div |
---|
1414 | USER=app_* |
---|
1415 | </PRE> |
---|
1416 | |
---|
1417 | <P> |
---|
1418 | These group assignments make the functions <TT>add</TT>, <TT>sub</TT>, |
---|
1419 | <TT>mul</TT> and <TT>div</TT> associated with group ``CALC'' and all functions |
---|
1420 | with the prefix <TT>app_</TT> are associated with group ``USER''. |
---|
1421 | |
---|
1422 | <P> |
---|
1423 | |
---|
1424 | <P> |
---|
1425 | |
---|
1426 | <H1><A NAME="SECTION00700000000000000000"> |
---|
1427 | Command Reference</A> |
---|
1428 | </H1> |
---|
1429 | |
---|
1430 | <H1><A NAME="SECTION00710000000000000000"> </A> |
---|
1431 | <A NAME="comm_wrappers"> </A> |
---|
1432 | <BR> |
---|
1433 | Compiler Wrappers (vtcc,vtcxx,vtf77,vtf90) |
---|
1434 | </H1> |
---|
1435 | |
---|
1436 | <P> |
---|
1437 | <PRE> |
---|
1438 | vtcc,vtcxx,vtf77,vtf90 - compiler wrappers for C, C++, |
---|
1439 | Fortran 77, Fortran 90 |
---|
1440 | |
---|
1441 | Syntax: vt<cc|cxx|f77|f90> [-vt:<cc|cxx|f77|f90> <cmd>] |
---|
1442 | [-vt:inst <insttype>] [-vt:<seq|mpi|omp|hyb>] |
---|
1443 | [-vt:opari <args>] [-vt:verbose] [-vt:version] |
---|
1444 | [-vt:showme] [-vt:showme_compile] |
---|
1445 | [-vt:showme_link] ... |
---|
1446 | |
---|
1447 | options: |
---|
1448 | -vt:help Show this help message. |
---|
1449 | -vt:<cc|cxx|f77|f90> <cmd> |
---|
1450 | Set the underlying compiler command. |
---|
1451 | |
---|
1452 | -vt:inst <insttype> Set the instrumentation type. |
---|
1453 | |
---|
1454 | possible values: |
---|
1455 | |
---|
1456 | gnu fully-automatic by GNU compiler |
---|
1457 | intel ... Intel (version >= 10.x) ... |
---|
1458 | pgi ... Portland Group (PGI) ... |
---|
1459 | phat ... SUN Fortran 90 ... |
---|
1460 | xl ... IBM ... |
---|
1461 | ftrace ... NEC SX ... |
---|
1462 | manual manual by using VampirTrace's API |
---|
1463 | pomp manual by using using POMP INST directives |
---|
1464 | dyninst binary by using Dyninst (www.dyninst.org) |
---|
1465 | |
---|
1466 | -vt:opari <args> Set options for OPARI command. (see |
---|
1467 | share/vampirtrace/doc/opari/Readme.html) |
---|
1468 | |
---|
1469 | -vt:<seq|mpi|omp|hyb> |
---|
1470 | Force application's parallelization type. |
---|
1471 | Necessary, if this cannot be determined |
---|
1472 | by underlying compiler and flags. |
---|
1473 | seq = sequential |
---|
1474 | mpi = parallel (uses MPI) |
---|
1475 | omp = parallel (uses OpenMP) |
---|
1476 | hyb = hybrid parallel (MPI + OpenMP) |
---|
1477 | (default: automatically determining by |
---|
1478 | underlying compiler and flags) |
---|
1479 | |
---|
1480 | -vt:verbose Enable verbose mode. |
---|
1481 | |
---|
1482 | -vt:showme Do not invoke the underlying compiler. |
---|
1483 | Instead, show the command line that |
---|
1484 | would be executed. |
---|
1485 | |
---|
1486 | -vt:showme_compile Do not invoke the underlying compiler. |
---|
1487 | Instead, show the compiler flags that |
---|
1488 | would be supplied to the compiler. |
---|
1489 | |
---|
1490 | -vt:showme_link Do not invoke the underlying compiler. |
---|
1491 | Instead, show the linker flags that |
---|
1492 | would be supplied to the compiler. |
---|
1493 | |
---|
1494 | See the man page for your underlying compiler for other |
---|
1495 | options that can be passed through 'vt<cc|cxx|f77|f90>'. |
---|
1496 | |
---|
1497 | Environment variables: |
---|
1498 | VT_CC Equivalent to '-vt:cc' |
---|
1499 | VT_CXX Equivalent to '-vt:cxx' |
---|
1500 | VT_F77 Equivalent to '-vt:f77' |
---|
1501 | VT_F90 Equivalent to '-vt:f90' |
---|
1502 | VT_INST Equivalent to '-vt:inst' |
---|
1503 | |
---|
1504 | The corresponding command line options overwrite the |
---|
1505 | environment variable settings. |
---|
1506 | |
---|
1507 | Examples: |
---|
1508 | automatically instrumentation by using GNU compiler: |
---|
1509 | |
---|
1510 | vtcc -vt:cc gcc -vt:inst gnu -c foo.c -o foo.o |
---|
1511 | vtcc -vt:cc gcc -vt:inst gnu -c bar.c -o bar.o |
---|
1512 | vtcc -vt:cc gcc -vt:inst gnu foo.o bar.o -o foo |
---|
1513 | |
---|
1514 | manually instrumentation by using VT's API: |
---|
1515 | |
---|
1516 | vtf90 -vt:inst manual foobar.F90 -o foobar -DVTRACE |
---|
1517 | |
---|
1518 | IMPORTANT: Fortran source files instrumented by VT's API or |
---|
1519 | POMP directives have to be preprocessed by CPP. |
---|
1520 | </PRE> |
---|
1521 | |
---|
1522 | <P> |
---|
1523 | |
---|
1524 | <H1><A NAME="SECTION00720000000000000000"> </A> |
---|
1525 | <A NAME="VTUNIFY"> </A> |
---|
1526 | <BR> |
---|
1527 | Local Trace Unifier (vtunify) |
---|
1528 | </H1> |
---|
1529 | |
---|
1530 | <P> |
---|
1531 | <PRE> |
---|
1532 | vtunify - local trace unifier for VampirTrace. |
---|
1533 | |
---|
1534 | Syntax: vtunify <#files> <iprefix> [-o <oprefix>] |
---|
1535 | [-c|--compress <on|off>] [-k|--keeplocal] |
---|
1536 | [-v|--verbose] |
---|
1537 | |
---|
1538 | Options: |
---|
1539 | -h, --help Show this help message. |
---|
1540 | |
---|
1541 | #files number of local trace files |
---|
1542 | (equal to # of '*.uctl' files) |
---|
1543 | |
---|
1544 | iprefix prefix of input trace filename. |
---|
1545 | |
---|
1546 | -o <oprefix> prefix of output trace filename. |
---|
1547 | |
---|
1548 | -s <statsofile> statistics output filename |
---|
1549 | default=<oprefix>.stats |
---|
1550 | |
---|
1551 | -q, --noshowstats Don't show statistics on stdout. |
---|
1552 | |
---|
1553 | -c, --nocompress Don't compress output trace files. |
---|
1554 | |
---|
1555 | -k, --keeplocal Don't remove input trace files. |
---|
1556 | |
---|
1557 | -v, --verbose Enable verbose mode. |
---|
1558 | </PRE> |
---|
1559 | |
---|
1560 | <P> |
---|
1561 | |
---|
1562 | <H1><A NAME="SECTION00730000000000000000"> </A> |
---|
1563 | <A NAME="VTDYN"> </A> |
---|
1564 | <BR> |
---|
1565 | Dyninst Mutator (vtdyn) |
---|
1566 | </H1> |
---|
1567 | |
---|
1568 | <P> |
---|
1569 | <PRE> |
---|
1570 | vtdyn - Dyninst Mutator for VampirTrace. |
---|
1571 | |
---|
1572 | Syntax: vtdyn [-v|--verbose] [-s|--shlib <shlib>[,...]] |
---|
1573 | [-b|--blacklist <bfile> [-p|--pid <pid>] |
---|
1574 | <app> [appargs ...] |
---|
1575 | |
---|
1576 | Options: |
---|
1577 | -h, --help Show this help message. |
---|
1578 | |
---|
1579 | -v, --verbose Enable verbose mode. |
---|
1580 | |
---|
1581 | -s, --shlib Comma-separated list of shared libraries |
---|
1582 | <shlib>[,...] which should also be instrumented. |
---|
1583 | |
---|
1584 | -b, --blacklist Set path of blacklist file containing |
---|
1585 | <bfile> a newline-separated list of functions |
---|
1586 | which should not be instrumented. |
---|
1587 | |
---|
1588 | -p, --pid <pid> application's process id |
---|
1589 | (attaches the mutator to a running process) |
---|
1590 | |
---|
1591 | app path of application executable |
---|
1592 | |
---|
1593 | appargs application's arguments |
---|
1594 | </PRE> |
---|
1595 | |
---|
1596 | <P> |
---|
1597 | |
---|
1598 | <H1><A NAME="SECTION00740000000000000000"> </A> |
---|
1599 | <A NAME="VTFILTER"> </A> |
---|
1600 | <BR> |
---|
1601 | Trace Filter Tool (vtfilter) |
---|
1602 | </H1> |
---|
1603 | |
---|
1604 | <P> |
---|
1605 | <PRE> |
---|
1606 | vtfilter - filter generator for VampirTrace |
---|
1607 | |
---|
1608 | Syntax: |
---|
1609 | Filter a trace file using an already existing filter file: |
---|
1610 | vtfilter -filt [filt-options] <input trace file> |
---|
1611 | Generate a filter: |
---|
1612 | vtfilter -gen [gen-options] <input trace file> |
---|
1613 | |
---|
1614 | general options: |
---|
1615 | -h, --help show this help message |
---|
1616 | -p show progress |
---|
1617 | |
---|
1618 | filt-options: |
---|
1619 | -to <file> output trace file name |
---|
1620 | |
---|
1621 | -fi <file> input filter file name |
---|
1622 | |
---|
1623 | -z <zlevel> Set the compression level. Level |
---|
1624 | reaches from 0 to 9 where 0 is no |
---|
1625 | compression and 9 is the highest |
---|
1626 | level. Standard is 4. |
---|
1627 | |
---|
1628 | -f <n> Set max number of file handles |
---|
1629 | available. Standard is 256. |
---|
1630 | |
---|
1631 | gen-options: |
---|
1632 | -fo <file> output filter file name |
---|
1633 | |
---|
1634 | -r <n> Reduce the trace size to <n> percent |
---|
1635 | of the original size. The program |
---|
1636 | relies on the fact that the major |
---|
1637 | part of the trace are function calls. |
---|
1638 | The approximation of size will get |
---|
1639 | worse with a rising percentage of |
---|
1640 | communication and other non function |
---|
1641 | calling or performance counter |
---|
1642 | records. |
---|
1643 | |
---|
1644 | -l <n> Limit the number of accepted |
---|
1645 | function calls for filtered functions |
---|
1646 | to <n>. Standard is 0. |
---|
1647 | |
---|
1648 | -ex <f>,<f>,... Exclude certain symbols from |
---|
1649 | filtering. A symbol may contain |
---|
1650 | wildcards. |
---|
1651 | |
---|
1652 | -in <f>,<f>,... Force to include certain symbols |
---|
1653 | into the filter. A symbol may contain |
---|
1654 | wildcards. |
---|
1655 | |
---|
1656 | -inc Automatically include children of |
---|
1657 | included functions as well into the |
---|
1658 | filter. |
---|
1659 | |
---|
1660 | -stats Prints out the desired and the |
---|
1661 | expected percentage of file size. |
---|
1662 | |
---|
1663 | |
---|
1664 | environment variables: |
---|
1665 | TRACEFILTER_EXCLUDEFILE Specifies a file containing a list |
---|
1666 | of symbols not to be filtered. The |
---|
1667 | list of members can be seperated |
---|
1668 | by space, comma, tab, newline and |
---|
1669 | may contain wildcards. |
---|
1670 | |
---|
1671 | TRACEFILTER_INCLUDEFILE Specifies a file containing a list |
---|
1672 | of symbols to be filtered. |
---|
1673 | </PRE> |
---|
1674 | |
---|
1675 | <P> |
---|
1676 | |
---|
1677 | <H1><A NAME="SECTION00800000000000000000"> </A> |
---|
1678 | <A NAME="papi"> </A> |
---|
1679 | <BR> |
---|
1680 | PAPI Counter Specifications |
---|
1681 | </H1> |
---|
1682 | |
---|
1683 | <P> |
---|
1684 | Available counter names can be queried with the PAPI commands |
---|
1685 | <TT>papi_avail</TT> and <TT>papi_native_avail</TT>. |
---|
1686 | There are limitations to the combinations of counters. To check |
---|
1687 | whether your choice works properly, use the command |
---|
1688 | <BR><TT>papi_event_chooser</TT>. |
---|
1689 | |
---|
1690 | <P> |
---|
1691 | <PRE> |
---|
1692 | PAPI_L[1|2|3]_[D|I|T]C[M|H|A|R|W] |
---|
1693 | Level 1/2/3 data/instruction/total cache |
---|
1694 | misses/hits/accesses/reads/writes |
---|
1695 | |
---|
1696 | PAPI_L[1|2|3]_[LD|ST]M |
---|
1697 | Level 1/2/3 load/store misses |
---|
1698 | |
---|
1699 | PAPI_CA_SNP Requests for a snoop |
---|
1700 | PAPI_CA_SHR Requests for exclusive access to shared cache line |
---|
1701 | PAPI_CA_CLN Requests for exclusive access to clean cache line |
---|
1702 | PAPI_CA_INV Requests for cache line invalidation |
---|
1703 | PAPI_CA_ITV Requests for cache line intervention |
---|
1704 | |
---|
1705 | PAPI_BRU_IDL Cycles branch units are idle |
---|
1706 | PAPI_FXU_IDL Cycles integer units are idle |
---|
1707 | PAPI_FPU_IDL Cycles floating point units are idle |
---|
1708 | PAPI_LSU_IDL Cycles load/store units are idle |
---|
1709 | |
---|
1710 | PAPI_TLB_DM Data translation lookaside buffer misses |
---|
1711 | PAPI_TLB_IM Instruction translation lookaside buffer misses |
---|
1712 | PAPI_TLB_TL Total translation lookaside buffer misses |
---|
1713 | |
---|
1714 | PAPI_BTAC_M Branch target address cache misses |
---|
1715 | PAPI_PRF_DM Data prefetch cache misses |
---|
1716 | PAPI_TLB_SD Translation lookaside buffer shootdowns |
---|
1717 | |
---|
1718 | PAPI_CSR_FAL Failed store conditional instructions |
---|
1719 | PAPI_CSR_SUC Successful store conditional instructions |
---|
1720 | PAPI_CSR_TOT Total store conditional instructions |
---|
1721 | |
---|
1722 | PAPI_MEM_SCY Cycles Stalled Waiting for memory accesses |
---|
1723 | PAPI_MEM_RCY Cycles Stalled Waiting for memory Reads |
---|
1724 | PAPI_MEM_WCY Cycles Stalled Waiting for memory writes |
---|
1725 | |
---|
1726 | PAPI_STL_ICY Cycles with no instruction issue |
---|
1727 | PAPI_FUL_ICY Cycles with maximum instruction issue |
---|
1728 | PAPI_STL_CCY Cycles with no instructions completed |
---|
1729 | PAPI_FUL_CCY Cycles with maximum instructions completed |
---|
1730 | |
---|
1731 | PAPI_BR_UCN Unconditional branch instructions |
---|
1732 | PAPI_BR_CN Conditional branch instructions |
---|
1733 | PAPI_BR_TKN Conditional branch instructions taken |
---|
1734 | PAPI_BR_NTK Conditional branch instructions not taken |
---|
1735 | PAPI_BR_MSP Conditional branch instructions mispredicted |
---|
1736 | PAPI_BR_PRC Conditional branch instructions correctly predicted |
---|
1737 | |
---|
1738 | PAPI_FMA_INS FMA instructions completed |
---|
1739 | PAPI_TOT_IIS Instructions issued |
---|
1740 | PAPI_TOT_INS Instructions completed |
---|
1741 | PAPI_INT_INS Integer instructions |
---|
1742 | PAPI_FP_INS Floating point instructions |
---|
1743 | PAPI_LD_INS Load instructions |
---|
1744 | PAPI_SR_INS Store instructions |
---|
1745 | PAPI_BR_INS Branch instructions |
---|
1746 | PAPI_VEC_INS Vector/SIMD instructions |
---|
1747 | PAPI_LST_INS Load/store instructions completed |
---|
1748 | PAPI_SYC_INS Synchronization instructions completed |
---|
1749 | PAPI_FML_INS Floating point multiply instructions |
---|
1750 | PAPI_FAD_INS Floating point add instructions |
---|
1751 | PAPI_FDV_INS Floating point divide instructions |
---|
1752 | PAPI_FSQ_INS Floating point square root instructions |
---|
1753 | PAPI_FNV_INS Floating point inverse instructions |
---|
1754 | |
---|
1755 | PAPI_RES_STL Cycles stalled on any resource |
---|
1756 | PAPI_FP_STAL Cycles the FP unit(s) are stalled |
---|
1757 | |
---|
1758 | PAPI_FP_OPS Floating point operations |
---|
1759 | PAPI_TOT_CYC Total cycles |
---|
1760 | PAPI_HW_INT Hardware interrupts |
---|
1761 | </PRE> |
---|
1762 | |
---|
1763 | <P> |
---|
1764 | |
---|
1765 | <H1><A NAME="SECTION00900000000000000000"> </A> |
---|
1766 | <A NAME="install"> </A> |
---|
1767 | <BR> |
---|
1768 | VampirTrace Installation |
---|
1769 | </H1> |
---|
1770 | |
---|
1771 | <P> |
---|
1772 | |
---|
1773 | <H1><A NAME="SECTION00910000000000000000"> |
---|
1774 | Basics</A> |
---|
1775 | </H1> |
---|
1776 | |
---|
1777 | <P> |
---|
1778 | Building VampirTrace is typically a combination of running <TT>configure</TT> |
---|
1779 | and |
---|
1780 | <BR><TT>make</TT>. Execute the following commands to install VampirTrace from |
---|
1781 | within the directory at the top of the tree: |
---|
1782 | |
---|
1783 | <P> |
---|
1784 | <PRE> |
---|
1785 | % ./configure --prefix=/where/to/install |
---|
1786 | [...lots of output...] |
---|
1787 | % make all install |
---|
1788 | </PRE> |
---|
1789 | |
---|
1790 | <P> |
---|
1791 | If you need special access for installing, then you can execute |
---|
1792 | <TT>make all</TT> as a user with write permissions in the build tree, and a |
---|
1793 | separate <TT>make install</TT> as a user with write permissions to the |
---|
1794 | install tree. |
---|
1795 | |
---|
1796 | <P> |
---|
1797 | However, for more details, also read the following instructions. Sometimes |
---|
1798 | it might be necessary to provide <TT>./configure</TT> with options, e.g. |
---|
1799 | specifications of paths or compilers. Please consult the CONFIG-EXAMPLES file to |
---|
1800 | get an idea of how to configure VampirTrace for your platform. |
---|
1801 | |
---|
1802 | <P> |
---|
1803 | VampirTrace comes with example programs written in C, C++, and Fortran. |
---|
1804 | They can be used to test different instrumentation types of the |
---|
1805 | VampirTrace installation. |
---|
1806 | You can find them in the directory <TT>examples</TT> of the VampirTrace package. |
---|
1807 | |
---|
1808 | <P> |
---|
1809 | |
---|
1810 | <H1><A NAME="SECTION00920000000000000000"> |
---|
1811 | Configure Options</A> |
---|
1812 | </H1> |
---|
1813 | |
---|
1814 | <P> |
---|
1815 | |
---|
1816 | <H2><A NAME="SECTION00921000000000000000"> |
---|
1817 | Compilers and Options</A> |
---|
1818 | </H2> |
---|
1819 | |
---|
1820 | <P> |
---|
1821 | Some systems require unusual options for compiling or linking that |
---|
1822 | the |
---|
1823 | <BR><TT>configure</TT> script does not know about. Run <TT>./configure -help</TT> |
---|
1824 | for details on some of the pertinent environment variables. |
---|
1825 | |
---|
1826 | <P> |
---|
1827 | You can pass initial values for configuration parameters to <TT>configure</TT> |
---|
1828 | by setting variables in the command line or in the environment. Here |
---|
1829 | is an example: |
---|
1830 | |
---|
1831 | <P> |
---|
1832 | <PRE> |
---|
1833 | % ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix |
---|
1834 | </PRE> |
---|
1835 | |
---|
1836 | <P> |
---|
1837 | |
---|
1838 | <H2><A NAME="SECTION00922000000000000000"> |
---|
1839 | Installation Names</A> |
---|
1840 | </H2> |
---|
1841 | |
---|
1842 | <P> |
---|
1843 | By default, <TT>make install</TT> will install the package's files in |
---|
1844 | <TT>/usr/local/bin</TT>, <TT>/usr/local/include</TT>, etc. You can specify an |
---|
1845 | installation prefix other than <TT>/usr/local</TT> by giving <TT>configure</TT> the |
---|
1846 | option <TT>-prefix=PATH</TT>. |
---|
1847 | |
---|
1848 | <P> |
---|
1849 | |
---|
1850 | <H2><A NAME="SECTION00923000000000000000"> |
---|
1851 | Optional Features</A> |
---|
1852 | </H2> |
---|
1853 | |
---|
1854 | <P> |
---|
1855 | <DL> |
---|
1856 | <DT><STRONG><TT>-enable-compinst=COMPINSTLIST</TT></STRONG></DT> |
---|
1857 | <DD> |
---|
1858 | <BR> |
---|
1859 | enable support for compiler instrumentation, |
---|
1860 | <BR> |
---|
1861 | e.g. (<TT>gnu,intel,pgi,phat,xl,ftrace</TT>), |
---|
1862 | <BR> |
---|
1863 | A VampirTrace installation can handle different compilers. |
---|
1864 | <BR> |
---|
1865 | The first item in the list is the run-time default. |
---|
1866 | <BR> |
---|
1867 | default: automatically by configure |
---|
1868 | |
---|
1869 | <P> |
---|
1870 | </DD> |
---|
1871 | <DT><STRONG><TT>-enable-mpi</TT></STRONG></DT> |
---|
1872 | <DD> |
---|
1873 | <BR> |
---|
1874 | enable MPI support, default: enable if |
---|
1875 | Open MPI found by configure |
---|
1876 | |
---|
1877 | <P> |
---|
1878 | </DD> |
---|
1879 | <DT><STRONG><TT>-enable-omp</TT></STRONG></DT> |
---|
1880 | <DD> |
---|
1881 | <BR> |
---|
1882 | enable OpenMP support, default: enable if |
---|
1883 | compiler supports OpenMP |
---|
1884 | |
---|
1885 | <P> |
---|
1886 | </DD> |
---|
1887 | <DT><STRONG><TT>-enable-hyb</TT></STRONG></DT> |
---|
1888 | <DD> |
---|
1889 | <BR> |
---|
1890 | enable Hybrid (MPI/OpenMP) support, default: |
---|
1891 | enable if MPI found and compiler supports OpenMP |
---|
1892 | |
---|
1893 | <P> |
---|
1894 | </DD> |
---|
1895 | <DT><STRONG><TT>-enable-memtrace</TT></STRONG></DT> |
---|
1896 | <DD> |
---|
1897 | <BR> |
---|
1898 | enable memory tracing support, default: enable if |
---|
1899 | found by configure |
---|
1900 | |
---|
1901 | <P> |
---|
1902 | </DD> |
---|
1903 | <DT><STRONG><TT>-enable-iotrace</TT></STRONG></DT> |
---|
1904 | <DD> |
---|
1905 | <BR> |
---|
1906 | enable libc's I/O tracing support, default: enable |
---|
1907 | if libdl found by configure |
---|
1908 | |
---|
1909 | <P> |
---|
1910 | </DD> |
---|
1911 | <DT><STRONG><TT>-enable-dyninst</TT></STRONG></DT> |
---|
1912 | <DD> |
---|
1913 | <BR> |
---|
1914 | enable support for Dyninst instrumentation, |
---|
1915 | <BR> |
---|
1916 | default: enable if found by configure |
---|
1917 | <BR> <SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst version 5.0.1 or higher! |
---|
1918 | <BR> (<TT><A NAME="tex2html9" |
---|
1919 | HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>) |
---|
1920 | |
---|
1921 | <P> |
---|
1922 | </DD> |
---|
1923 | <DT><STRONG><TT>-enable-dyninst-attlib</TT></STRONG></DT> |
---|
1924 | <DD> |
---|
1925 | <BR> |
---|
1926 | build shared library which attaches dyninst to |
---|
1927 | the running application, |
---|
1928 | <BR> |
---|
1929 | default: enable if dyninst found |
---|
1930 | by configure and system supports shared libraries |
---|
1931 | |
---|
1932 | <P> |
---|
1933 | </DD> |
---|
1934 | <DT><STRONG><TT>-enable-papi</TT></STRONG></DT> |
---|
1935 | <DD> |
---|
1936 | <BR> |
---|
1937 | enable PAPI hardware counter support, |
---|
1938 | <BR> |
---|
1939 | default: enable if found by configure |
---|
1940 | </DD> |
---|
1941 | </DL> |
---|
1942 | |
---|
1943 | <P> |
---|
1944 | |
---|
1945 | <H2><A NAME="SECTION00924000000000000000"> |
---|
1946 | Important Optional Packages</A> |
---|
1947 | </H2> |
---|
1948 | |
---|
1949 | <P> |
---|
1950 | <DL> |
---|
1951 | <DT><STRONG><TT>-with-local-tmp-dir=LTMPDIR</TT></STRONG></DT> |
---|
1952 | <DD> |
---|
1953 | <BR> |
---|
1954 | give the path for node-local temporary directory |
---|
1955 | to store local traces to, default: <TT>/tmp/</TT> |
---|
1956 | </DD> |
---|
1957 | </DL> |
---|
1958 | |
---|
1959 | <P> |
---|
1960 | If you would like to use an external version of OTF library, set: |
---|
1961 | <DL> |
---|
1962 | <DT><STRONG><TT>-with-extern-otf</TT></STRONG></DT> |
---|
1963 | <DD> |
---|
1964 | <BR> |
---|
1965 | use external OTF library, default: not set |
---|
1966 | </DD> |
---|
1967 | <DT><STRONG><TT>-with-extern-otf-dir=OTFDIR</TT></STRONG></DT> |
---|
1968 | <DD> |
---|
1969 | <BR> |
---|
1970 | give the path for OTF, default: <TT>/usr/local/</TT> |
---|
1971 | |
---|
1972 | <P> |
---|
1973 | </DD> |
---|
1974 | <DT><STRONG><TT>-with-otf-flags=FLAGS</TT></STRONG></DT> |
---|
1975 | <DD> |
---|
1976 | <BR> |
---|
1977 | pass FLAGS to the OTF distribution configuration |
---|
1978 | (only for internal OTF version) |
---|
1979 | |
---|
1980 | <P> |
---|
1981 | </DD> |
---|
1982 | <DT><STRONG><TT>-with-otf-lib=OTFLIB</TT></STRONG></DT> |
---|
1983 | <DD> |
---|
1984 | <BR> |
---|
1985 | use given otf lib, default: <TT>-lotf -lz</TT> |
---|
1986 | |
---|
1987 | <P> |
---|
1988 | </DD> |
---|
1989 | </DL> |
---|
1990 | |
---|
1991 | <P> |
---|
1992 | If used OTF library was built without zlib support, then OTFLIB will |
---|
1993 | be set to <TT>-lotf</TT>. |
---|
1994 | |
---|
1995 | <P> |
---|
1996 | <DL> |
---|
1997 | <DT><STRONG><TT>-with-dyninst-dir=DYNIDIR</TT></STRONG></DT> |
---|
1998 | <DD> |
---|
1999 | <BR> |
---|
2000 | give the path for DYNINST, default: <TT>/usr/local/</TT> |
---|
2001 | |
---|
2002 | <P> |
---|
2003 | </DD> |
---|
2004 | <DT><STRONG><TT>-with-papi-dir=PAPIDIR</TT></STRONG></DT> |
---|
2005 | <DD> |
---|
2006 | <BR> |
---|
2007 | give the path for PAPI, default: <TT>/usr/</TT> |
---|
2008 | </DD> |
---|
2009 | </DL> |
---|
2010 | |
---|
2011 | <P> |
---|
2012 | If you have not specified the environment variable <TT>MPICC</TT> |
---|
2013 | (MPI compiler command), use the following options to set the location |
---|
2014 | of your MPI installation: |
---|
2015 | |
---|
2016 | <P> |
---|
2017 | <DL> |
---|
2018 | <DT><STRONG><TT>-with-mpi-dir=MPIDIR</TT></STRONG></DT> |
---|
2019 | <DD> |
---|
2020 | <BR> |
---|
2021 | give the path for MPI, default: <TT>/usr/</TT> |
---|
2022 | |
---|
2023 | <P> |
---|
2024 | </DD> |
---|
2025 | <DT><STRONG><TT>-with-mpi-inc-dir=MPIINCDIR</TT></STRONG></DT> |
---|
2026 | <DD> |
---|
2027 | <BR> |
---|
2028 | give the path for MPI include files, |
---|
2029 | <BR> |
---|
2030 | default: <TT>$MPIDIR/include/</TT> |
---|
2031 | |
---|
2032 | <P> |
---|
2033 | </DD> |
---|
2034 | <DT><STRONG><TT>-with-mpi-lib-dir=MPILIBDIR</TT></STRONG></DT> |
---|
2035 | <DD> |
---|
2036 | <BR> |
---|
2037 | give the path for MPI-libraries, default: <TT>$MPIDIR/lib/</TT> |
---|
2038 | |
---|
2039 | <P> |
---|
2040 | </DD> |
---|
2041 | <DT><STRONG><TT>-with-mpi-lib=MPILIB</TT></STRONG></DT> |
---|
2042 | <DD> |
---|
2043 | <BR> |
---|
2044 | use given mpi lib |
---|
2045 | |
---|
2046 | <P> |
---|
2047 | </DD> |
---|
2048 | <DT><STRONG><TT>-with-pmpi-lib=PMPILIB</TT></STRONG></DT> |
---|
2049 | <DD> |
---|
2050 | <BR> |
---|
2051 | use given pmpi lib, default: MPILIB |
---|
2052 | </DD> |
---|
2053 | </DL> |
---|
2054 | |
---|
2055 | <P> |
---|
2056 | |
---|
2057 | <H1><A NAME="SECTION00930000000000000000"> |
---|
2058 | Cross Compilation</A> |
---|
2059 | </H1> |
---|
2060 | |
---|
2061 | <P> |
---|
2062 | Building VampirTrace on cross compilation platforms needs some special attention. |
---|
2063 | The compiler wrappers and OPARI are built for the front-end (build system) whereas |
---|
2064 | the VampirTrace libraries, vtdyn, vtunify, and vtfilter are built for the back-end |
---|
2065 | (host system). Some <TT>configure</TT> options which are of interest for cross compilation |
---|
2066 | are shown below: |
---|
2067 | |
---|
2068 | <UL> |
---|
2069 | <LI>Set <TT>CC</TT>, <TT>CXX</TT>, <TT>F77</TT>, and <TT>FC</TT> to the cross compilers installed on the front-end. |
---|
2070 | </LI> |
---|
2071 | <LI>Set <TT>CXX_FOR_BUILD</TT> to the native compiler of the front-end (used to compile compiler wrappers and OPARI only). |
---|
2072 | </LI> |
---|
2073 | <LI>Set <TT>-host=</TT> to the output of <TT>config.guess</TT> on the back-end. |
---|
2074 | </LI> |
---|
2075 | <LI>Maybe you also need to set additional commands and flags for the back-end (e.g. <TT>RANLIB</TT>, <TT>AR</TT>, <TT>MPICC</TT>, <TT>CXXFLAGS</TT>). |
---|
2076 | </LI> |
---|
2077 | </UL> |
---|
2078 | For example, this <TT>configure</TT> command line works for an NEC SX6 system with an X86_64 based front-end: |
---|
2079 | |
---|
2080 | <P> |
---|
2081 | <PRE> |
---|
2082 | % ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 |
---|
2083 | AR=sxar RANLIB="sxar st" CXX_FOR_BUILD=c++ |
---|
2084 | --host=sx6-nec-superux14.1 |
---|
2085 | --with-otf-lib=-lotf |
---|
2086 | </PRE> |
---|
2087 | |
---|
2088 | <P> |
---|
2089 | |
---|
2090 | <H1><A NAME="SECTION00940000000000000000"> |
---|
2091 | Environment Set-Up</A> |
---|
2092 | </H1> |
---|
2093 | |
---|
2094 | <P> |
---|
2095 | Add the <TT>bin</TT> subdirectory of the installation directory to your |
---|
2096 | <TT>$PATH</TT> environment variable. To use VampirTrace with Dyninst, |
---|
2097 | you will also need to add the lib subdirectory to your |
---|
2098 | <TT>LD_LIBRARY_PATH</TT> environment variable: |
---|
2099 | <BR> |
---|
2100 | <BR> |
---|
2101 | <BR> |
---|
2102 | for csh and tcsh: |
---|
2103 | <PRE> |
---|
2104 | > setenv PATH <vt-install>/bin:$PATH |
---|
2105 | > setenv LD_LIBRARY_PATH <vt-install>/lib:$LD_LIBRARY_PATH |
---|
2106 | </PRE> |
---|
2107 | for bash and sh: |
---|
2108 | <PRE> |
---|
2109 | % export PATH=<vt-install>/bin:$PATH |
---|
2110 | % export LD_LIBRARY_PATH=<vt-install>/lib:$LD_LIBRARY_PATH |
---|
2111 | </PRE> |
---|
2112 | |
---|
2113 | <P> |
---|
2114 | |
---|
2115 | <H1><A NAME="SECTION00950000000000000000"> |
---|
2116 | Notes for Developers</A> |
---|
2117 | </H1> |
---|
2118 | |
---|
2119 | <P> |
---|
2120 | |
---|
2121 | <H2><A NAME="SECTION00951000000000000000"> |
---|
2122 | Build from CVS</A> |
---|
2123 | </H2> |
---|
2124 | |
---|
2125 | <P> |
---|
2126 | If you have checked out a <SPAN CLASS="textit">developer's copy</SPAN> of VampirTrace (i.e. |
---|
2127 | checked out from CVS), you should first run: |
---|
2128 | |
---|
2129 | <P> |
---|
2130 | <PRE> |
---|
2131 | % ./bootstrap |
---|
2132 | </PRE> |
---|
2133 | Note that GNU Autoconf ≥2.60 and GNU Automake ≥1.9.6 is required. |
---|
2134 | You can download them from <TT><A NAME="tex2html10" |
---|
2135 | HREF="http://www.gnu.org/software/autoconf">http://www.gnu.org/software/autoconf</A></TT> |
---|
2136 | and <TT><A NAME="tex2html11" |
---|
2137 | HREF="http://www.gnu.org/software/automake">http://www.gnu.org/software/automake</A></TT>. |
---|
2138 | |
---|
2139 | <P> |
---|
2140 | |
---|
2141 | <H2><A NAME="SECTION00952000000000000000"> |
---|
2142 | Creating a distribution tarball (VampirTrace-X.X.X.tar.gz)</A> |
---|
2143 | </H2> |
---|
2144 | |
---|
2145 | <P> |
---|
2146 | If you would like to create a new distribution tarball, run: |
---|
2147 | |
---|
2148 | <P> |
---|
2149 | <PRE> |
---|
2150 | % ./makedist -o <otftarball> <major> <minor> <release> |
---|
2151 | </PRE> |
---|
2152 | instead of <TT>make dist</TT>. |
---|
2153 | The script <TT>makedist</TT> adapts the version number <TT><major>.<minor>.<release></TT> in |
---|
2154 | <TT>configure.in</TT> and extracts given OTF-tarball <TT><otftarball></TT> in |
---|
2155 | <TT>./extlib/otf/</TT>. |
---|
2156 | |
---|
2157 | <P> |
---|
2158 | <BR><HR> |
---|
2159 | |
---|
2160 | </BODY> |
---|
2161 | </HTML> |
---|