source: proiecte/PDAD/trunk/failurecause/pig/mr/failurecause.pig @ 154

Last change on this file since 154 was 154, checked in by (none), 14 years ago

PDAD project

File size: 840 bytes
Line 
1-- This script processes failure cause for different categories of events grouped by duration (short, medium, long)
2
3-- author: cristina
4
5REGISTER failurecausefunc.jar;
6
7raw = LOAD '$inputDir/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id,platform_id,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray);
8
9-- build durations
10duration = FOREACH raw GENERATE event_stop_time-event_start_time as dur, event_end_reason as fault;
11mapping = FOREACH duration GENERATE flatten(RealLabels(dur, fault)) as key, 1 as value;
12--filteredMapping = FILTER mapping BY key!=NULL;
13
14
15-- group and average
16grup = GROUP mapping BY (chararray)key;
17rez = FOREACH grup GENERATE group, SUM(mapping.value) as sum;
18filteredRez = FILTER rez BY sum > 1000;
19
20STORE filteredRez INTO '$outputDir' USING PigStorage();
Note: See TracBrowser for help on using the repository browser.