-- This script processes failure cause for different categories of events grouped by duration (short, medium, long) -- author: cristina REGISTER failurecausefunc.jar; raw = LOAD '$inputDir/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id,platform_id,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray); -- build durations duration = FOREACH raw GENERATE event_stop_time-event_start_time as dur, event_end_reason as fault; mapping = FOREACH duration GENERATE flatten(RealLabels(dur, fault)) as key, 1 as value; --filteredMapping = FILTER mapping BY key!=NULL; -- group and average grup = GROUP mapping BY (chararray)key; rez = FOREACH grup GENERATE group, SUM(mapping.value) as sum; filteredRez = FILTER rez BY sum > 1000; STORE filteredRez INTO '$outputDir' USING PigStorage();