1 | /** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | * contributor license agreements. See the NOTICE file distributed with this |
---|
4 | * work for additional information regarding copyright ownership. The ASF |
---|
5 | * licenses this file to you under the Apache License, Version 2.0 (the |
---|
6 | * "License"); you may not use this file except in compliance with the License. |
---|
7 | * You may obtain a copy of the License at |
---|
8 | * |
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | * |
---|
11 | * Unless required by applicable law or agreed to in writing, software |
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
---|
13 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
---|
14 | * License for the specific language governing permissions and limitations under |
---|
15 | * the License. |
---|
16 | */ |
---|
17 | package org.apache.hadoop.io.file.tfile; |
---|
18 | |
---|
19 | import java.util.Random; |
---|
20 | |
---|
21 | import org.apache.hadoop.io.BytesWritable; |
---|
22 | import org.apache.hadoop.io.WritableComparator; |
---|
23 | import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG; |
---|
24 | |
---|
25 | /** |
---|
26 | * Generate random <key, value> pairs. |
---|
27 | */ |
---|
28 | class KVGenerator { |
---|
29 | private final Random random; |
---|
30 | private final byte[][] dict; |
---|
31 | private final boolean sorted; |
---|
32 | private final DiscreteRNG keyLenRNG, valLenRNG; |
---|
33 | private BytesWritable lastKey; |
---|
34 | private static final int MIN_KEY_LEN = 4; |
---|
35 | private final byte prefix[] = new byte[MIN_KEY_LEN]; |
---|
36 | |
---|
37 | public KVGenerator(Random random, boolean sorted, DiscreteRNG keyLenRNG, |
---|
38 | DiscreteRNG valLenRNG, DiscreteRNG wordLenRNG, int dictSize) { |
---|
39 | this.random = random; |
---|
40 | dict = new byte[dictSize][]; |
---|
41 | this.sorted = sorted; |
---|
42 | this.keyLenRNG = keyLenRNG; |
---|
43 | this.valLenRNG = valLenRNG; |
---|
44 | for (int i = 0; i < dictSize; ++i) { |
---|
45 | int wordLen = wordLenRNG.nextInt(); |
---|
46 | dict[i] = new byte[wordLen]; |
---|
47 | random.nextBytes(dict[i]); |
---|
48 | } |
---|
49 | lastKey = new BytesWritable(); |
---|
50 | fillKey(lastKey); |
---|
51 | } |
---|
52 | |
---|
53 | private void fillKey(BytesWritable o) { |
---|
54 | int len = keyLenRNG.nextInt(); |
---|
55 | if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; |
---|
56 | o.setSize(len); |
---|
57 | int n = MIN_KEY_LEN; |
---|
58 | while (n < len) { |
---|
59 | byte[] word = dict[random.nextInt(dict.length)]; |
---|
60 | int l = Math.min(word.length, len - n); |
---|
61 | System.arraycopy(word, 0, o.get(), n, l); |
---|
62 | n += l; |
---|
63 | } |
---|
64 | if (sorted |
---|
65 | && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey |
---|
66 | .getSize() |
---|
67 | - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) { |
---|
68 | incrementPrefix(); |
---|
69 | } |
---|
70 | |
---|
71 | System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); |
---|
72 | lastKey.set(o); |
---|
73 | } |
---|
74 | |
---|
75 | private void fillValue(BytesWritable o) { |
---|
76 | int len = valLenRNG.nextInt(); |
---|
77 | o.setSize(len); |
---|
78 | int n = 0; |
---|
79 | while (n < len) { |
---|
80 | byte[] word = dict[random.nextInt(dict.length)]; |
---|
81 | int l = Math.min(word.length, len - n); |
---|
82 | System.arraycopy(word, 0, o.get(), n, l); |
---|
83 | n += l; |
---|
84 | } |
---|
85 | } |
---|
86 | |
---|
87 | private void incrementPrefix() { |
---|
88 | for (int i = MIN_KEY_LEN - 1; i >= 0; --i) { |
---|
89 | ++prefix[i]; |
---|
90 | if (prefix[i] != 0) return; |
---|
91 | } |
---|
92 | |
---|
93 | throw new RuntimeException("Prefix overflown"); |
---|
94 | } |
---|
95 | |
---|
96 | public void next(BytesWritable key, BytesWritable value, boolean dupKey) { |
---|
97 | if (dupKey) { |
---|
98 | key.set(lastKey); |
---|
99 | } |
---|
100 | else { |
---|
101 | fillKey(key); |
---|
102 | } |
---|
103 | fillValue(value); |
---|
104 | } |
---|
105 | } |
---|