1 | /** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
3 | * or more contributor license agreements. See the NOTICE file |
---|
4 | * distributed with this work for additional information |
---|
5 | * regarding copyright ownership. The ASF licenses this file |
---|
6 | * to you under the Apache License, Version 2.0 (the |
---|
7 | * "License"); you may not use this file except in compliance |
---|
8 | * with the License. You may obtain a copy of the License at |
---|
9 | * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
11 | * |
---|
12 | * Unless required by applicable law or agreed to in writing, software |
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
15 | * See the License for the specific language governing permissions and |
---|
16 | * limitations under the License. |
---|
17 | */ |
---|
18 | package org.apache.hadoop.util; |
---|
19 | |
---|
20 | import java.io.IOException; |
---|
21 | import java.util.Arrays; |
---|
22 | import java.util.Random; |
---|
23 | |
---|
24 | import junit.framework.TestCase; |
---|
25 | |
---|
26 | import org.apache.hadoop.conf.Configuration; |
---|
27 | import org.apache.hadoop.io.DataInputBuffer; |
---|
28 | import org.apache.hadoop.io.DataOutputBuffer; |
---|
29 | import org.apache.hadoop.io.Text; |
---|
30 | import org.apache.hadoop.io.WritableComparator; |
---|
31 | |
---|
32 | public class TestIndexedSort extends TestCase { |
---|
33 | |
---|
34 | public void sortAllEqual(IndexedSorter sorter) throws Exception { |
---|
35 | final int SAMPLE = 500; |
---|
36 | int[] values = new int[SAMPLE]; |
---|
37 | Arrays.fill(values, 10); |
---|
38 | SampleSortable s = new SampleSortable(values); |
---|
39 | sorter.sort(s, 0, SAMPLE); |
---|
40 | int[] check = s.getSorted(); |
---|
41 | assertTrue(Arrays.toString(values) + "\ndoesn't match\n" + |
---|
42 | Arrays.toString(check), Arrays.equals(values, check)); |
---|
43 | // Set random min/max, re-sort. |
---|
44 | Random r = new Random(); |
---|
45 | int min = r.nextInt(SAMPLE); |
---|
46 | int max = (min + 1 + r.nextInt(SAMPLE - 2)) % SAMPLE; |
---|
47 | values[min] = 9; |
---|
48 | values[max] = 11; |
---|
49 | System.out.println("testAllEqual setting min/max at " + min + "/" + max + |
---|
50 | "(" + sorter.getClass().getName() + ")"); |
---|
51 | s = new SampleSortable(values); |
---|
52 | sorter.sort(s, 0, SAMPLE); |
---|
53 | check = s.getSorted(); |
---|
54 | Arrays.sort(values); |
---|
55 | assertTrue(check[0] == 9); |
---|
56 | assertTrue(check[SAMPLE - 1] == 11); |
---|
57 | assertTrue(Arrays.toString(values) + "\ndoesn't match\n" + |
---|
58 | Arrays.toString(check), Arrays.equals(values, check)); |
---|
59 | } |
---|
60 | |
---|
61 | public void sortSorted(IndexedSorter sorter) throws Exception { |
---|
62 | final int SAMPLE = 500; |
---|
63 | int[] values = new int[SAMPLE]; |
---|
64 | Random r = new Random(); |
---|
65 | long seed = r.nextLong(); |
---|
66 | r.setSeed(seed); |
---|
67 | System.out.println("testSorted seed: " + seed + |
---|
68 | "(" + sorter.getClass().getName() + ")"); |
---|
69 | for (int i = 0; i < SAMPLE; ++i) { |
---|
70 | values[i] = r.nextInt(100); |
---|
71 | } |
---|
72 | Arrays.sort(values); |
---|
73 | SampleSortable s = new SampleSortable(values); |
---|
74 | sorter.sort(s, 0, SAMPLE); |
---|
75 | int[] check = s.getSorted(); |
---|
76 | assertTrue(Arrays.toString(values) + "\ndoesn't match\n" + |
---|
77 | Arrays.toString(check), Arrays.equals(values, check)); |
---|
78 | } |
---|
79 | |
---|
80 | public void sortSequential(IndexedSorter sorter) throws Exception { |
---|
81 | final int SAMPLE = 500; |
---|
82 | int[] values = new int[SAMPLE]; |
---|
83 | for (int i = 0; i < SAMPLE; ++i) { |
---|
84 | values[i] = i; |
---|
85 | } |
---|
86 | SampleSortable s = new SampleSortable(values); |
---|
87 | sorter.sort(s, 0, SAMPLE); |
---|
88 | int[] check = s.getSorted(); |
---|
89 | assertTrue(Arrays.toString(values) + "\ndoesn't match\n" + |
---|
90 | Arrays.toString(check), Arrays.equals(values, check)); |
---|
91 | } |
---|
92 | |
---|
93 | public void sortSingleRecord(IndexedSorter sorter) throws Exception { |
---|
94 | final int SAMPLE = 1; |
---|
95 | SampleSortable s = new SampleSortable(SAMPLE); |
---|
96 | int[] values = s.getValues(); |
---|
97 | sorter.sort(s, 0, SAMPLE); |
---|
98 | int[] check = s.getSorted(); |
---|
99 | assertTrue(Arrays.toString(values) + "\ndoesn't match\n" + |
---|
100 | Arrays.toString(check), Arrays.equals(values, check)); |
---|
101 | } |
---|
102 | |
---|
103 | public void sortRandom(IndexedSorter sorter) throws Exception { |
---|
104 | final int SAMPLE = 256 * 1024; |
---|
105 | SampleSortable s = new SampleSortable(SAMPLE); |
---|
106 | long seed = s.getSeed(); |
---|
107 | System.out.println("sortRandom seed: " + seed + |
---|
108 | "(" + sorter.getClass().getName() + ")"); |
---|
109 | int[] values = s.getValues(); |
---|
110 | Arrays.sort(values); |
---|
111 | sorter.sort(s, 0, SAMPLE); |
---|
112 | int[] check = s.getSorted(); |
---|
113 | assertTrue("seed: " + seed + "\ndoesn't match\n", |
---|
114 | Arrays.equals(values, check)); |
---|
115 | } |
---|
116 | |
---|
117 | public void sortWritable(IndexedSorter sorter) throws Exception { |
---|
118 | final int SAMPLE = 1000; |
---|
119 | WritableSortable s = new WritableSortable(SAMPLE); |
---|
120 | long seed = s.getSeed(); |
---|
121 | System.out.println("sortWritable seed: " + seed + |
---|
122 | "(" + sorter.getClass().getName() + ")"); |
---|
123 | String[] values = s.getValues(); |
---|
124 | Arrays.sort(values); |
---|
125 | sorter.sort(s, 0, SAMPLE); |
---|
126 | String[] check = s.getSorted(); |
---|
127 | assertTrue("seed: " + seed + "\ndoesn't match", |
---|
128 | Arrays.equals(values, check)); |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | public void testQuickSort() throws Exception { |
---|
133 | QuickSort sorter = new QuickSort(); |
---|
134 | sortRandom(sorter); |
---|
135 | sortSingleRecord(sorter); |
---|
136 | sortSequential(sorter); |
---|
137 | sortSorted(sorter); |
---|
138 | sortAllEqual(sorter); |
---|
139 | sortWritable(sorter); |
---|
140 | |
---|
141 | // test degenerate case for median-of-three partitioning |
---|
142 | // a_n, a_1, a_2, ..., a_{n-1} |
---|
143 | final int DSAMPLE = 500; |
---|
144 | int[] values = new int[DSAMPLE]; |
---|
145 | for (int i = 0; i < DSAMPLE; ++i) { values[i] = i; } |
---|
146 | values[0] = values[DSAMPLE - 1] + 1; |
---|
147 | SampleSortable s = new SampleSortable(values); |
---|
148 | values = s.getValues(); |
---|
149 | final int DSS = (DSAMPLE / 2) * (DSAMPLE / 2); |
---|
150 | // Worst case is (N/2)^2 comparisons, not including those effecting |
---|
151 | // the median-of-three partitioning; impl should handle this case |
---|
152 | MeasuredSortable m = new MeasuredSortable(s, DSS); |
---|
153 | sorter.sort(m, 0, DSAMPLE); |
---|
154 | System.out.println("QuickSort degen cmp/swp: " + |
---|
155 | m.getCmp() + "/" + m.getSwp() + |
---|
156 | "(" + sorter.getClass().getName() + ")"); |
---|
157 | Arrays.sort(values); |
---|
158 | int[] check = s.getSorted(); |
---|
159 | assertTrue(Arrays.equals(values, check)); |
---|
160 | } |
---|
161 | |
---|
162 | public void testHeapSort() throws Exception { |
---|
163 | HeapSort sorter = new HeapSort(); |
---|
164 | sortRandom(sorter); |
---|
165 | sortSingleRecord(sorter); |
---|
166 | sortSequential(sorter); |
---|
167 | sortSorted(sorter); |
---|
168 | sortAllEqual(sorter); |
---|
169 | sortWritable(sorter); |
---|
170 | } |
---|
171 | |
---|
172 | // Sortables // |
---|
173 | |
---|
174 | private static class SampleSortable implements IndexedSortable { |
---|
175 | private int[] valindex; |
---|
176 | private int[] valindirect; |
---|
177 | private int[] values; |
---|
178 | private final long seed; |
---|
179 | |
---|
180 | public SampleSortable() { |
---|
181 | this(50); |
---|
182 | } |
---|
183 | |
---|
184 | public SampleSortable(int j) { |
---|
185 | Random r = new Random(); |
---|
186 | seed = r.nextLong(); |
---|
187 | r.setSeed(seed); |
---|
188 | values = new int[j]; |
---|
189 | valindex = new int[j]; |
---|
190 | valindirect = new int[j]; |
---|
191 | for (int i = 0; i < j; ++i) { |
---|
192 | valindex[i] = valindirect[i] = i; |
---|
193 | values[i] = r.nextInt(1000); |
---|
194 | } |
---|
195 | } |
---|
196 | |
---|
197 | public SampleSortable(int[] values) { |
---|
198 | this.values = values; |
---|
199 | valindex = new int[values.length]; |
---|
200 | valindirect = new int[values.length]; |
---|
201 | for (int i = 0; i < values.length; ++i) { |
---|
202 | valindex[i] = valindirect[i] = i; |
---|
203 | } |
---|
204 | seed = 0; |
---|
205 | } |
---|
206 | |
---|
207 | public long getSeed() { |
---|
208 | return seed; |
---|
209 | } |
---|
210 | |
---|
211 | public int compare(int i, int j) { |
---|
212 | // assume positive |
---|
213 | return |
---|
214 | values[valindirect[valindex[i]]] - values[valindirect[valindex[j]]]; |
---|
215 | } |
---|
216 | |
---|
217 | public void swap(int i, int j) { |
---|
218 | int tmp = valindex[i]; |
---|
219 | valindex[i] = valindex[j]; |
---|
220 | valindex[j] = tmp; |
---|
221 | } |
---|
222 | |
---|
223 | public int[] getSorted() { |
---|
224 | int[] ret = new int[values.length]; |
---|
225 | for (int i = 0; i < ret.length; ++i) { |
---|
226 | ret[i] = values[valindirect[valindex[i]]]; |
---|
227 | } |
---|
228 | return ret; |
---|
229 | } |
---|
230 | |
---|
231 | public int[] getValues() { |
---|
232 | int[] ret = new int[values.length]; |
---|
233 | System.arraycopy(values, 0, ret, 0, values.length); |
---|
234 | return ret; |
---|
235 | } |
---|
236 | |
---|
237 | } |
---|
238 | |
---|
239 | public static class MeasuredSortable implements IndexedSortable { |
---|
240 | |
---|
241 | private int comparisions; |
---|
242 | private int swaps; |
---|
243 | private final int maxcmp; |
---|
244 | private final int maxswp; |
---|
245 | private IndexedSortable s; |
---|
246 | |
---|
247 | public MeasuredSortable(IndexedSortable s) { |
---|
248 | this(s, Integer.MAX_VALUE); |
---|
249 | } |
---|
250 | |
---|
251 | public MeasuredSortable(IndexedSortable s, int maxcmp) { |
---|
252 | this(s, maxcmp, Integer.MAX_VALUE); |
---|
253 | } |
---|
254 | |
---|
255 | public MeasuredSortable(IndexedSortable s, int maxcmp, int maxswp) { |
---|
256 | this.s = s; |
---|
257 | this.maxcmp = maxcmp; |
---|
258 | this.maxswp = maxswp; |
---|
259 | } |
---|
260 | |
---|
261 | public int getCmp() { return comparisions; } |
---|
262 | public int getSwp() { return swaps; } |
---|
263 | |
---|
264 | public int compare(int i, int j) { |
---|
265 | assertTrue("Expected fewer than " + maxcmp + " comparisons", |
---|
266 | ++comparisions < maxcmp); |
---|
267 | return s.compare(i, j); |
---|
268 | } |
---|
269 | |
---|
270 | public void swap(int i, int j) { |
---|
271 | assertTrue("Expected fewer than " + maxswp + " swaps", |
---|
272 | ++swaps < maxswp); |
---|
273 | s.swap(i, j); |
---|
274 | } |
---|
275 | |
---|
276 | } |
---|
277 | |
---|
278 | private static class WritableSortable implements IndexedSortable { |
---|
279 | |
---|
280 | private static Random r = new Random(); |
---|
281 | private final int eob; |
---|
282 | private final int[] indices; |
---|
283 | private final int[] offsets; |
---|
284 | private final byte[] bytes; |
---|
285 | private final WritableComparator comparator; |
---|
286 | private final String[] check; |
---|
287 | private final long seed; |
---|
288 | |
---|
289 | public WritableSortable() throws IOException { |
---|
290 | this(100); |
---|
291 | } |
---|
292 | |
---|
293 | public WritableSortable(int j) throws IOException { |
---|
294 | seed = r.nextLong(); |
---|
295 | r.setSeed(seed); |
---|
296 | Text t = new Text(); |
---|
297 | StringBuffer sb = new StringBuffer(); |
---|
298 | indices = new int[j]; |
---|
299 | offsets = new int[j]; |
---|
300 | check = new String[j]; |
---|
301 | DataOutputBuffer dob = new DataOutputBuffer(); |
---|
302 | for (int i = 0; i < j; ++i) { |
---|
303 | indices[i] = i; |
---|
304 | offsets[i] = dob.getLength(); |
---|
305 | genRandom(t, r.nextInt(15) + 1, sb); |
---|
306 | t.write(dob); |
---|
307 | check[i] = t.toString(); |
---|
308 | } |
---|
309 | eob = dob.getLength(); |
---|
310 | bytes = dob.getData(); |
---|
311 | comparator = WritableComparator.get(Text.class); |
---|
312 | } |
---|
313 | |
---|
314 | public long getSeed() { |
---|
315 | return seed; |
---|
316 | } |
---|
317 | |
---|
318 | private static void genRandom(Text t, int len, StringBuffer sb) { |
---|
319 | sb.setLength(0); |
---|
320 | for (int i = 0; i < len; ++i) { |
---|
321 | sb.append(Integer.toString(r.nextInt(26) + 10, 36)); |
---|
322 | } |
---|
323 | t.set(sb.toString()); |
---|
324 | } |
---|
325 | |
---|
326 | public int compare(int i, int j) { |
---|
327 | final int ii = indices[i]; |
---|
328 | final int ij = indices[j]; |
---|
329 | return comparator.compare(bytes, offsets[ii], |
---|
330 | ((ii + 1 == indices.length) ? eob : offsets[ii + 1]) - offsets[ii], |
---|
331 | bytes, offsets[ij], |
---|
332 | ((ij + 1 == indices.length) ? eob : offsets[ij + 1]) - offsets[ij]); |
---|
333 | } |
---|
334 | |
---|
335 | public void swap(int i, int j) { |
---|
336 | int tmp = indices[i]; |
---|
337 | indices[i] = indices[j]; |
---|
338 | indices[j] = tmp; |
---|
339 | } |
---|
340 | |
---|
341 | public String[] getValues() { |
---|
342 | return check; |
---|
343 | } |
---|
344 | |
---|
345 | public String[] getSorted() throws IOException { |
---|
346 | String[] ret = new String[indices.length]; |
---|
347 | Text t = new Text(); |
---|
348 | DataInputBuffer dib = new DataInputBuffer(); |
---|
349 | for (int i = 0; i < ret.length; ++i) { |
---|
350 | int ii = indices[i]; |
---|
351 | dib.reset(bytes, offsets[ii], |
---|
352 | ((ii + 1 == indices.length) ? eob : offsets[ii + 1]) - offsets[ii]); |
---|
353 | t.readFields(dib); |
---|
354 | ret[i] = t.toString(); |
---|
355 | } |
---|
356 | return ret; |
---|
357 | } |
---|
358 | |
---|
359 | } |
---|
360 | |
---|
361 | } |
---|