-- This script processes number of faults per geographical location. -- author: cristina REGISTER locationconcat.jar; raw_event = LOAD '$inputDir1/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id:int,platform_id:int,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray); raw_node = LOAD '$inputDir2/node.tab' USING PigStorage('\t') AS (node_id:int,platform_id:int,node_name,node_ip,node_location,timezone,proc_model,os_name,cores_per_proc,num_procs,mem_size,disk_size, up_bw,down_bw,metric_id,notes); -- build durations event = FOREACH raw_event GENERATE flatten(Concat(node_id,platform_id)) as id, 1 as value; node = FOREACH raw_node GENERATE flatten(Concat(node_id,platform_id)) as id, node_location as location; --event = FOREACH raw_event GENERATE node_id as id, 1 as value; --node = FOREACH raw_node GENERATE node_id as id, node_location as location; joined = JOIN event by (chararray)id, node by (chararray)id; filtered = FOREACH joined GENERATE location, value; grouped = GROUP filtered by (chararray)location; rez = FOREACH grouped GENERATE group, SUM(filtered.value); STORE rez INTO '$outputDir' USING PigStorage();