1 | -- This script processes number of faults per geographical location. |
---|
2 | |
---|
3 | -- author: cristina |
---|
4 | |
---|
5 | REGISTER locationconcat.jar; |
---|
6 | |
---|
7 | raw_event = LOAD '$inputDir1/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id:int,platform_id:int,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray); |
---|
8 | raw_node = LOAD '$inputDir2/node.tab' USING PigStorage('\t') AS (node_id:int,platform_id:int,node_name,node_ip,node_location,timezone,proc_model,os_name,cores_per_proc,num_procs,mem_size,disk_size, up_bw,down_bw,metric_id,notes); |
---|
9 | |
---|
10 | -- build durations |
---|
11 | event = FOREACH raw_event GENERATE flatten(Concat(node_id,platform_id)) as id, 1 as value; |
---|
12 | node = FOREACH raw_node GENERATE flatten(Concat(node_id,platform_id)) as id, node_location as location; |
---|
13 | --event = FOREACH raw_event GENERATE node_id as id, 1 as value; |
---|
14 | --node = FOREACH raw_node GENERATE node_id as id, node_location as location; |
---|
15 | joined = JOIN event by (chararray)id, node by (chararray)id; |
---|
16 | filtered = FOREACH joined GENERATE location, value; |
---|
17 | grouped = GROUP filtered by (chararray)location; |
---|
18 | rez = FOREACH grouped GENERATE group, SUM(filtered.value); |
---|
19 | |
---|
20 | STORE rez INTO '$outputDir' USING PigStorage(); |
---|
21 | |
---|