1 | /* |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
3 | * or more contributor license agreements. See the NOTICE file |
---|
4 | * distributed with this work for additional information |
---|
5 | * regarding copyright ownership. The ASF licenses this file |
---|
6 | * to you under the Apache License, Version 2.0 (the |
---|
7 | * "License"); you may not use this file except in compliance |
---|
8 | * with the License. You may obtain a copy of the License at |
---|
9 | * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
11 | * |
---|
12 | * Unless required by applicable law or agreed to in writing, software |
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
15 | * See the License for the specific language governing permissions and |
---|
16 | * limitations under the License. |
---|
17 | */ |
---|
18 | package org.apache.hadoop.mapred; |
---|
19 | |
---|
20 | import java.util.Comparator; |
---|
21 | import org.apache.hadoop.io.IntWritable; |
---|
22 | import org.apache.hadoop.util.MergeSort; |
---|
23 | import org.apache.hadoop.io.SequenceFile.Sorter.RawKeyValueIterator; |
---|
24 | |
---|
25 | /** This class implements the sort method from BasicTypeSorterBase class as |
---|
26 | * MergeSort. Note that this class is really a wrapper over the actual |
---|
27 | * mergesort implementation that is there in the util package. The main intent |
---|
28 | * of providing this class is to setup the input data for the util.MergeSort |
---|
29 | * algo so that the latter doesn't need to bother about the various data |
---|
30 | * structures that have been created for the Map output but rather concentrate |
---|
31 | * on the core algorithm (thereby allowing easy integration of a mergesort |
---|
32 | * implementation). The bridge between this class and the util.MergeSort class |
---|
33 | * is the Comparator. |
---|
34 | */ |
---|
35 | class MergeSorter extends BasicTypeSorterBase |
---|
36 | implements Comparator<IntWritable> { |
---|
37 | private static int progressUpdateFrequency = 10000; |
---|
38 | private int progressCalls = 0; |
---|
39 | |
---|
40 | /** The sort method derived from BasicTypeSorterBase and overridden here*/ |
---|
41 | public RawKeyValueIterator sort() { |
---|
42 | MergeSort m = new MergeSort(this); |
---|
43 | int count = super.count; |
---|
44 | if (count == 0) return null; |
---|
45 | int [] pointers = super.pointers; |
---|
46 | int [] pointersCopy = new int[count]; |
---|
47 | System.arraycopy(pointers, 0, pointersCopy, 0, count); |
---|
48 | m.mergeSort(pointers, pointersCopy, 0, count); |
---|
49 | return new MRSortResultIterator(super.keyValBuffer, pointersCopy, |
---|
50 | super.startOffsets, super.keyLengths, super.valueLengths); |
---|
51 | } |
---|
52 | /** The implementation of the compare method from Comparator. Note that |
---|
53 | * Comparator.compare takes objects as inputs and so the int values are |
---|
54 | * wrapped in (reusable) IntWritables from the class util.MergeSort |
---|
55 | * @param i |
---|
56 | * @param j |
---|
57 | * @return int as per the specification of Comparator.compare |
---|
58 | */ |
---|
59 | public int compare (IntWritable i, IntWritable j) { |
---|
60 | // indicate we're making progress but do a batch update |
---|
61 | if (progressCalls < progressUpdateFrequency) { |
---|
62 | progressCalls++; |
---|
63 | } else { |
---|
64 | progressCalls = 0; |
---|
65 | reporter.progress(); |
---|
66 | } |
---|
67 | return comparator.compare(keyValBuffer.getData(), startOffsets[i.get()], |
---|
68 | keyLengths[i.get()], |
---|
69 | keyValBuffer.getData(), startOffsets[j.get()], |
---|
70 | keyLengths[j.get()]); |
---|
71 | } |
---|
72 | |
---|
73 | /** Add the extra memory that will be utilized by the sort method */ |
---|
74 | public long getMemoryUtilized() { |
---|
75 | //this is memory that will be actually utilized (considering the temp |
---|
76 | //array that will be allocated by the sort() method (mergesort)) |
---|
77 | return super.getMemoryUtilized() + super.count * 4; |
---|
78 | } |
---|
79 | |
---|
80 | } |
---|