-- This script processes number of faults per geographical location. -- author: cristina REGISTER locationconcat.jar; raw_event = LOAD '$inputDir/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id:int,platform_id:int,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray); raw_node = LOAD '/home/cristina/PPproj/repo/tests/node.tab' USING PigStorage('\t') AS (node_id:int,platform_id:int,node_name,node_ip,node_location,timezone,proc_model,os_name,cores_per_proc,num_procs,mem_size,disk_size, up_bw,down_bw,metric_id,notes); -- build durations event = FOREACH raw_event GENERATE Concat(node_id,platform_id) as id, 1 as value; node = FOREACH raw_node GENERATE Concat(node_id,platform_id) as id, node_location as location; --event = FOREACH raw_event GENERATE node_id as id, 1 as value; --node = FOREACH raw_node GENERATE node_id as id, node_location as location; joined = JOIN event by id, node by id; filtered = FOREACH joined GENERATE location, value; grouped = GROUP filtered by location; rez = FOREACH grouped GENERATE group, SUM(filtered.value); STORE rez INTO 'nodeslocation.rez' USING PigStorage();