source: proiecte/HadoopJUnit/hadoop-0.20.1/src/mapred/org/apache/hadoop/mapred/MergeSorter.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 3.5 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18package org.apache.hadoop.mapred;
19
20import java.util.Comparator;
21import org.apache.hadoop.io.IntWritable;
22import org.apache.hadoop.util.MergeSort;
23import org.apache.hadoop.io.SequenceFile.Sorter.RawKeyValueIterator;
24
25/** This class implements the sort method from BasicTypeSorterBase class as
26 * MergeSort. Note that this class is really a wrapper over the actual
27 * mergesort implementation that is there in the util package. The main intent
28 * of providing this class is to setup the input data for the util.MergeSort
29 * algo so that the latter doesn't need to bother about the various data
30 * structures that have been created for the Map output but rather concentrate
31 * on the core algorithm (thereby allowing easy integration of a mergesort
32 * implementation). The bridge between this class and the util.MergeSort class
33 * is the Comparator.
34 */
35class MergeSorter extends BasicTypeSorterBase
36implements Comparator<IntWritable> {
37  private static int progressUpdateFrequency = 10000;
38  private int progressCalls = 0;
39 
40  /** The sort method derived from BasicTypeSorterBase and overridden here*/
41  public RawKeyValueIterator sort() {
42    MergeSort m = new MergeSort(this);
43    int count = super.count;
44    if (count == 0) return null;
45    int [] pointers = super.pointers;
46    int [] pointersCopy = new int[count];
47    System.arraycopy(pointers, 0, pointersCopy, 0, count);
48    m.mergeSort(pointers, pointersCopy, 0, count);
49    return new MRSortResultIterator(super.keyValBuffer, pointersCopy, 
50                                    super.startOffsets, super.keyLengths, super.valueLengths);
51  }
52  /** The implementation of the compare method from Comparator. Note that
53   * Comparator.compare takes objects as inputs and so the int values are
54   * wrapped in (reusable) IntWritables from the class util.MergeSort
55   * @param i
56   * @param j
57   * @return int as per the specification of Comparator.compare
58   */
59  public int compare (IntWritable i, IntWritable j) {
60    // indicate we're making progress but do a batch update
61    if (progressCalls < progressUpdateFrequency) {
62      progressCalls++;
63    } else {
64      progressCalls = 0;
65      reporter.progress();
66    } 
67    return comparator.compare(keyValBuffer.getData(), startOffsets[i.get()],
68                              keyLengths[i.get()],
69                              keyValBuffer.getData(), startOffsets[j.get()], 
70                              keyLengths[j.get()]);
71  }
72 
73  /** Add the extra memory that will be utilized by the sort method */
74  public long getMemoryUtilized() {
75    //this is memory that will be actually utilized (considering the temp
76    //array that will be allocated by the sort() method (mergesort))
77    return super.getMemoryUtilized() + super.count * 4; 
78  }
79
80}
Note: See TracBrowser for help on using the repository browser.