[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
| 3 | * contributor license agreements. See the NOTICE file distributed with this |
---|
| 4 | * work for additional information regarding copyright ownership. The ASF |
---|
| 5 | * licenses this file to you under the Apache License, Version 2.0 (the |
---|
| 6 | * "License"); you may not use this file except in compliance with the License. |
---|
| 7 | * You may obtain a copy of the License at |
---|
| 8 | * |
---|
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 10 | * |
---|
| 11 | * Unless required by applicable law or agreed to in writing, software |
---|
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
---|
| 13 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
---|
| 14 | * License for the specific language governing permissions and limitations under |
---|
| 15 | * the License. |
---|
| 16 | */ |
---|
| 17 | |
---|
| 18 | package org.apache.hadoop.io.file.tfile; |
---|
| 19 | |
---|
| 20 | import java.io.IOException; |
---|
| 21 | import java.text.DateFormat; |
---|
| 22 | import java.text.SimpleDateFormat; |
---|
| 23 | import java.util.Random; |
---|
| 24 | import java.util.StringTokenizer; |
---|
| 25 | |
---|
| 26 | import junit.framework.TestCase; |
---|
| 27 | |
---|
| 28 | import org.apache.commons.cli.CommandLine; |
---|
| 29 | import org.apache.commons.cli.CommandLineParser; |
---|
| 30 | import org.apache.commons.cli.GnuParser; |
---|
| 31 | import org.apache.commons.cli.HelpFormatter; |
---|
| 32 | import org.apache.commons.cli.Option; |
---|
| 33 | import org.apache.commons.cli.OptionBuilder; |
---|
| 34 | import org.apache.commons.cli.Options; |
---|
| 35 | import org.apache.commons.cli.ParseException; |
---|
| 36 | import org.apache.hadoop.conf.Configuration; |
---|
| 37 | import org.apache.hadoop.fs.FSDataInputStream; |
---|
| 38 | import org.apache.hadoop.fs.FSDataOutputStream; |
---|
| 39 | import org.apache.hadoop.fs.FileSystem; |
---|
| 40 | import org.apache.hadoop.fs.Path; |
---|
| 41 | import org.apache.hadoop.io.BytesWritable; |
---|
| 42 | import org.apache.hadoop.io.SequenceFile; |
---|
| 43 | import org.apache.hadoop.io.compress.CompressionCodec; |
---|
| 44 | import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner.Entry; |
---|
| 45 | |
---|
| 46 | public class TestTFileSeqFileComparison extends TestCase { |
---|
| 47 | MyOptions options; |
---|
| 48 | |
---|
| 49 | private FileSystem fs; |
---|
| 50 | private Configuration conf; |
---|
| 51 | private long startTimeEpoch; |
---|
| 52 | private long finishTimeEpoch; |
---|
| 53 | private DateFormat formatter; |
---|
| 54 | byte[][] dictionary; |
---|
| 55 | |
---|
| 56 | @Override |
---|
| 57 | public void setUp() throws IOException { |
---|
| 58 | if (options == null) { |
---|
| 59 | options = new MyOptions(new String[0]); |
---|
| 60 | } |
---|
| 61 | |
---|
| 62 | conf = new Configuration(); |
---|
| 63 | conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize); |
---|
| 64 | conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize); |
---|
| 65 | Path path = new Path(options.rootDir); |
---|
| 66 | fs = path.getFileSystem(conf); |
---|
| 67 | formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
---|
| 68 | setUpDictionary(); |
---|
| 69 | } |
---|
| 70 | |
---|
| 71 | private void setUpDictionary() { |
---|
| 72 | Random rng = new Random(); |
---|
| 73 | dictionary = new byte[options.dictSize][]; |
---|
| 74 | for (int i = 0; i < options.dictSize; ++i) { |
---|
| 75 | int len = |
---|
| 76 | rng.nextInt(options.maxWordLen - options.minWordLen) |
---|
| 77 | + options.minWordLen; |
---|
| 78 | dictionary[i] = new byte[len]; |
---|
| 79 | rng.nextBytes(dictionary[i]); |
---|
| 80 | } |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | @Override |
---|
| 84 | public void tearDown() throws IOException { |
---|
| 85 | // do nothing |
---|
| 86 | } |
---|
| 87 | |
---|
| 88 | public void startTime() throws IOException { |
---|
| 89 | startTimeEpoch = System.currentTimeMillis(); |
---|
| 90 | System.out.println(formatTime() + " Started timing."); |
---|
| 91 | } |
---|
| 92 | |
---|
| 93 | public void stopTime() throws IOException { |
---|
| 94 | finishTimeEpoch = System.currentTimeMillis(); |
---|
| 95 | System.out.println(formatTime() + " Stopped timing."); |
---|
| 96 | } |
---|
| 97 | |
---|
| 98 | public long getIntervalMillis() throws IOException { |
---|
| 99 | return finishTimeEpoch - startTimeEpoch; |
---|
| 100 | } |
---|
| 101 | |
---|
| 102 | public void printlnWithTimestamp(String message) throws IOException { |
---|
| 103 | System.out.println(formatTime() + " " + message); |
---|
| 104 | } |
---|
| 105 | |
---|
| 106 | /* |
---|
| 107 | * Format millis into minutes and seconds. |
---|
| 108 | */ |
---|
| 109 | public String formatTime(long milis) { |
---|
| 110 | return formatter.format(milis); |
---|
| 111 | } |
---|
| 112 | |
---|
| 113 | public String formatTime() { |
---|
| 114 | return formatTime(System.currentTimeMillis()); |
---|
| 115 | } |
---|
| 116 | |
---|
| 117 | private interface KVAppendable { |
---|
| 118 | public void append(BytesWritable key, BytesWritable value) |
---|
| 119 | throws IOException; |
---|
| 120 | |
---|
| 121 | public void close() throws IOException; |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | private interface KVReadable { |
---|
| 125 | public byte[] getKey(); |
---|
| 126 | |
---|
| 127 | public byte[] getValue(); |
---|
| 128 | |
---|
| 129 | public int getKeyLength(); |
---|
| 130 | |
---|
| 131 | public int getValueLength(); |
---|
| 132 | |
---|
| 133 | public boolean next() throws IOException; |
---|
| 134 | |
---|
| 135 | public void close() throws IOException; |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | static class TFileAppendable implements KVAppendable { |
---|
| 139 | private FSDataOutputStream fsdos; |
---|
| 140 | private TFile.Writer writer; |
---|
| 141 | |
---|
| 142 | public TFileAppendable(FileSystem fs, Path path, String compress, |
---|
| 143 | int minBlkSize, int osBufferSize, Configuration conf) |
---|
| 144 | throws IOException { |
---|
| 145 | this.fsdos = fs.create(path, true, osBufferSize); |
---|
| 146 | this.writer = new TFile.Writer(fsdos, minBlkSize, compress, null, conf); |
---|
| 147 | } |
---|
| 148 | |
---|
| 149 | public void append(BytesWritable key, BytesWritable value) |
---|
| 150 | throws IOException { |
---|
| 151 | writer.append(key.get(), 0, key.getSize(), value.get(), 0, value |
---|
| 152 | .getSize()); |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | public void close() throws IOException { |
---|
| 156 | writer.close(); |
---|
| 157 | fsdos.close(); |
---|
| 158 | } |
---|
| 159 | } |
---|
| 160 | |
---|
| 161 | static class TFileReadable implements KVReadable { |
---|
| 162 | private FSDataInputStream fsdis; |
---|
| 163 | private TFile.Reader reader; |
---|
| 164 | private TFile.Reader.Scanner scanner; |
---|
| 165 | private byte[] keyBuffer; |
---|
| 166 | private int keyLength; |
---|
| 167 | private byte[] valueBuffer; |
---|
| 168 | private int valueLength; |
---|
| 169 | |
---|
| 170 | public TFileReadable(FileSystem fs, Path path, int osBufferSize, |
---|
| 171 | Configuration conf) throws IOException { |
---|
| 172 | this.fsdis = fs.open(path, osBufferSize); |
---|
| 173 | this.reader = |
---|
| 174 | new TFile.Reader(fsdis, fs.getFileStatus(path).getLen(), conf); |
---|
| 175 | this.scanner = reader.createScanner(); |
---|
| 176 | keyBuffer = new byte[32]; |
---|
| 177 | valueBuffer = new byte[32]; |
---|
| 178 | } |
---|
| 179 | |
---|
| 180 | private void checkKeyBuffer(int size) { |
---|
| 181 | if (size <= keyBuffer.length) { |
---|
| 182 | return; |
---|
| 183 | } |
---|
| 184 | keyBuffer = |
---|
| 185 | new byte[Math.max(2 * keyBuffer.length, 2 * size - keyBuffer.length)]; |
---|
| 186 | } |
---|
| 187 | |
---|
| 188 | private void checkValueBuffer(int size) { |
---|
| 189 | if (size <= valueBuffer.length) { |
---|
| 190 | return; |
---|
| 191 | } |
---|
| 192 | valueBuffer = |
---|
| 193 | new byte[Math.max(2 * valueBuffer.length, 2 * size |
---|
| 194 | - valueBuffer.length)]; |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | public byte[] getKey() { |
---|
| 198 | return keyBuffer; |
---|
| 199 | } |
---|
| 200 | |
---|
| 201 | public int getKeyLength() { |
---|
| 202 | return keyLength; |
---|
| 203 | } |
---|
| 204 | |
---|
| 205 | public byte[] getValue() { |
---|
| 206 | return valueBuffer; |
---|
| 207 | } |
---|
| 208 | |
---|
| 209 | public int getValueLength() { |
---|
| 210 | return valueLength; |
---|
| 211 | } |
---|
| 212 | |
---|
| 213 | public boolean next() throws IOException { |
---|
| 214 | if (scanner.atEnd()) return false; |
---|
| 215 | Entry entry = scanner.entry(); |
---|
| 216 | keyLength = entry.getKeyLength(); |
---|
| 217 | checkKeyBuffer(keyLength); |
---|
| 218 | entry.getKey(keyBuffer); |
---|
| 219 | valueLength = entry.getValueLength(); |
---|
| 220 | checkValueBuffer(valueLength); |
---|
| 221 | entry.getValue(valueBuffer); |
---|
| 222 | scanner.advance(); |
---|
| 223 | return true; |
---|
| 224 | } |
---|
| 225 | |
---|
| 226 | public void close() throws IOException { |
---|
| 227 | scanner.close(); |
---|
| 228 | reader.close(); |
---|
| 229 | fsdis.close(); |
---|
| 230 | } |
---|
| 231 | } |
---|
| 232 | |
---|
| 233 | static class SeqFileAppendable implements KVAppendable { |
---|
| 234 | private FSDataOutputStream fsdos; |
---|
| 235 | private SequenceFile.Writer writer; |
---|
| 236 | |
---|
| 237 | public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize, |
---|
| 238 | String compress, int minBlkSize) throws IOException { |
---|
| 239 | Configuration conf = new Configuration(); |
---|
| 240 | conf.setBoolean("hadoop.native.lib", true); |
---|
| 241 | |
---|
| 242 | CompressionCodec codec = null; |
---|
| 243 | if ("lzo".equals(compress)) { |
---|
| 244 | codec = Compression.Algorithm.LZO.getCodec(); |
---|
| 245 | } |
---|
| 246 | else if ("gz".equals(compress)) { |
---|
| 247 | codec = Compression.Algorithm.GZ.getCodec(); |
---|
| 248 | } |
---|
| 249 | else if (!"none".equals(compress)) |
---|
| 250 | throw new IOException("Codec not supported."); |
---|
| 251 | |
---|
| 252 | this.fsdos = fs.create(path, true, osBufferSize); |
---|
| 253 | |
---|
| 254 | if (!"none".equals(compress)) { |
---|
| 255 | writer = |
---|
| 256 | SequenceFile.createWriter(conf, fsdos, BytesWritable.class, |
---|
| 257 | BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec); |
---|
| 258 | } |
---|
| 259 | else { |
---|
| 260 | writer = |
---|
| 261 | SequenceFile.createWriter(conf, fsdos, BytesWritable.class, |
---|
| 262 | BytesWritable.class, SequenceFile.CompressionType.NONE, null); |
---|
| 263 | } |
---|
| 264 | } |
---|
| 265 | |
---|
| 266 | public void append(BytesWritable key, BytesWritable value) |
---|
| 267 | throws IOException { |
---|
| 268 | writer.append(key, value); |
---|
| 269 | } |
---|
| 270 | |
---|
| 271 | public void close() throws IOException { |
---|
| 272 | writer.close(); |
---|
| 273 | fsdos.close(); |
---|
| 274 | } |
---|
| 275 | } |
---|
| 276 | |
---|
| 277 | static class SeqFileReadable implements KVReadable { |
---|
| 278 | private SequenceFile.Reader reader; |
---|
| 279 | private BytesWritable key; |
---|
| 280 | private BytesWritable value; |
---|
| 281 | |
---|
| 282 | public SeqFileReadable(FileSystem fs, Path path, int osBufferSize) |
---|
| 283 | throws IOException { |
---|
| 284 | Configuration conf = new Configuration(); |
---|
| 285 | conf.setInt("io.file.buffer.size", osBufferSize); |
---|
| 286 | reader = new SequenceFile.Reader(fs, path, conf); |
---|
| 287 | key = new BytesWritable(); |
---|
| 288 | value = new BytesWritable(); |
---|
| 289 | } |
---|
| 290 | |
---|
| 291 | public byte[] getKey() { |
---|
| 292 | return key.get(); |
---|
| 293 | } |
---|
| 294 | |
---|
| 295 | public int getKeyLength() { |
---|
| 296 | return key.getSize(); |
---|
| 297 | } |
---|
| 298 | |
---|
| 299 | public byte[] getValue() { |
---|
| 300 | return value.get(); |
---|
| 301 | } |
---|
| 302 | |
---|
| 303 | public int getValueLength() { |
---|
| 304 | return value.getSize(); |
---|
| 305 | } |
---|
| 306 | |
---|
| 307 | public boolean next() throws IOException { |
---|
| 308 | return reader.next(key, value); |
---|
| 309 | } |
---|
| 310 | |
---|
| 311 | public void close() throws IOException { |
---|
| 312 | reader.close(); |
---|
| 313 | } |
---|
| 314 | } |
---|
| 315 | |
---|
| 316 | private void reportStats(Path path, long totalBytes) throws IOException { |
---|
| 317 | long duration = getIntervalMillis(); |
---|
| 318 | long fsize = fs.getFileStatus(path).getLen(); |
---|
| 319 | printlnWithTimestamp(String.format( |
---|
| 320 | "Duration: %dms...total size: %.2fMB...raw thrpt: %.2fMB/s", duration, |
---|
| 321 | (double) totalBytes / 1024 / 1024, (double) totalBytes / duration |
---|
| 322 | * 1000 / 1024 / 1024)); |
---|
| 323 | printlnWithTimestamp(String.format( |
---|
| 324 | "Compressed size: %.2fMB...compressed thrpt: %.2fMB/s.", |
---|
| 325 | (double) fsize / 1024 / 1024, (double) fsize / duration * 1000 / 1024 |
---|
| 326 | / 1024)); |
---|
| 327 | } |
---|
| 328 | |
---|
| 329 | private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) { |
---|
| 330 | int n = 0; |
---|
| 331 | while (n < len) { |
---|
| 332 | byte[] word = dictionary[rng.nextInt(dictionary.length)]; |
---|
| 333 | int l = Math.min(word.length, len - n); |
---|
| 334 | System.arraycopy(word, 0, tmp, n, l); |
---|
| 335 | n += l; |
---|
| 336 | } |
---|
| 337 | bw.set(tmp, 0, len); |
---|
| 338 | } |
---|
| 339 | |
---|
| 340 | private void timeWrite(Path path, KVAppendable appendable, int baseKlen, |
---|
| 341 | int baseVlen, long fileSize) throws IOException { |
---|
| 342 | int maxKlen = baseKlen * 2; |
---|
| 343 | int maxVlen = baseVlen * 2; |
---|
| 344 | BytesWritable key = new BytesWritable(); |
---|
| 345 | BytesWritable value = new BytesWritable(); |
---|
| 346 | byte[] keyBuffer = new byte[maxKlen]; |
---|
| 347 | byte[] valueBuffer = new byte[maxVlen]; |
---|
| 348 | Random rng = new Random(options.seed); |
---|
| 349 | long totalBytes = 0; |
---|
| 350 | printlnWithTimestamp("Start writing: " + path.getName() + "..."); |
---|
| 351 | startTime(); |
---|
| 352 | |
---|
| 353 | for (long i = 0; true; ++i) { |
---|
| 354 | if (i % 1000 == 0) { // test the size for every 1000 rows. |
---|
| 355 | if (fs.getFileStatus(path).getLen() >= fileSize) { |
---|
| 356 | break; |
---|
| 357 | } |
---|
| 358 | } |
---|
| 359 | int klen = rng.nextInt(baseKlen) + baseKlen; |
---|
| 360 | int vlen = rng.nextInt(baseVlen) + baseVlen; |
---|
| 361 | fillBuffer(rng, key, keyBuffer, klen); |
---|
| 362 | fillBuffer(rng, value, valueBuffer, vlen); |
---|
| 363 | key.set(keyBuffer, 0, klen); |
---|
| 364 | value.set(valueBuffer, 0, vlen); |
---|
| 365 | appendable.append(key, value); |
---|
| 366 | totalBytes += klen; |
---|
| 367 | totalBytes += vlen; |
---|
| 368 | } |
---|
| 369 | stopTime(); |
---|
| 370 | appendable.close(); |
---|
| 371 | reportStats(path, totalBytes); |
---|
| 372 | } |
---|
| 373 | |
---|
| 374 | private void timeRead(Path path, KVReadable readable) throws IOException { |
---|
| 375 | printlnWithTimestamp("Start reading: " + path.getName() + "..."); |
---|
| 376 | long totalBytes = 0; |
---|
| 377 | startTime(); |
---|
| 378 | for (; readable.next();) { |
---|
| 379 | totalBytes += readable.getKeyLength(); |
---|
| 380 | totalBytes += readable.getValueLength(); |
---|
| 381 | } |
---|
| 382 | stopTime(); |
---|
| 383 | readable.close(); |
---|
| 384 | reportStats(path, totalBytes); |
---|
| 385 | } |
---|
| 386 | |
---|
| 387 | private void createTFile(String parameters, String compress) |
---|
| 388 | throws IOException { |
---|
| 389 | System.out.println("=== TFile: Creation (" + parameters + ") === "); |
---|
| 390 | Path path = new Path(options.rootDir, "TFile.Performance"); |
---|
| 391 | KVAppendable appendable = |
---|
| 392 | new TFileAppendable(fs, path, compress, options.minBlockSize, |
---|
| 393 | options.osOutputBufferSize, conf); |
---|
| 394 | timeWrite(path, appendable, options.keyLength, options.valueLength, |
---|
| 395 | options.fileSize); |
---|
| 396 | } |
---|
| 397 | |
---|
| 398 | private void readTFile(String parameters, boolean delFile) throws IOException { |
---|
| 399 | System.out.println("=== TFile: Reading (" + parameters + ") === "); |
---|
| 400 | { |
---|
| 401 | Path path = new Path(options.rootDir, "TFile.Performance"); |
---|
| 402 | KVReadable readable = |
---|
| 403 | new TFileReadable(fs, path, options.osInputBufferSize, conf); |
---|
| 404 | timeRead(path, readable); |
---|
| 405 | if (delFile) { |
---|
| 406 | if (fs.exists(path)) { |
---|
| 407 | fs.delete(path, true); |
---|
| 408 | } |
---|
| 409 | } |
---|
| 410 | } |
---|
| 411 | } |
---|
| 412 | |
---|
| 413 | private void createSeqFile(String parameters, String compress) |
---|
| 414 | throws IOException { |
---|
| 415 | System.out.println("=== SeqFile: Creation (" + parameters + ") === "); |
---|
| 416 | Path path = new Path(options.rootDir, "SeqFile.Performance"); |
---|
| 417 | KVAppendable appendable = |
---|
| 418 | new SeqFileAppendable(fs, path, options.osOutputBufferSize, compress, |
---|
| 419 | options.minBlockSize); |
---|
| 420 | timeWrite(path, appendable, options.keyLength, options.valueLength, |
---|
| 421 | options.fileSize); |
---|
| 422 | } |
---|
| 423 | |
---|
| 424 | private void readSeqFile(String parameters, boolean delFile) |
---|
| 425 | throws IOException { |
---|
| 426 | System.out.println("=== SeqFile: Reading (" + parameters + ") === "); |
---|
| 427 | Path path = new Path(options.rootDir, "SeqFile.Performance"); |
---|
| 428 | KVReadable readable = |
---|
| 429 | new SeqFileReadable(fs, path, options.osInputBufferSize); |
---|
| 430 | timeRead(path, readable); |
---|
| 431 | if (delFile) { |
---|
| 432 | if (fs.exists(path)) { |
---|
| 433 | fs.delete(path, true); |
---|
| 434 | } |
---|
| 435 | } |
---|
| 436 | } |
---|
| 437 | |
---|
| 438 | private void compareRun(String compress) throws IOException { |
---|
| 439 | String[] supported = TFile.getSupportedCompressionAlgorithms(); |
---|
| 440 | boolean proceed = false; |
---|
| 441 | for (String c : supported) { |
---|
| 442 | if (c.equals(compress)) { |
---|
| 443 | proceed = true; |
---|
| 444 | break; |
---|
| 445 | } |
---|
| 446 | } |
---|
| 447 | |
---|
| 448 | if (!proceed) { |
---|
| 449 | System.out.println("Skipped for " + compress); |
---|
| 450 | return; |
---|
| 451 | } |
---|
| 452 | |
---|
| 453 | options.compress = compress; |
---|
| 454 | String parameters = parameters2String(options); |
---|
| 455 | createSeqFile(parameters, compress); |
---|
| 456 | readSeqFile(parameters, true); |
---|
| 457 | createTFile(parameters, compress); |
---|
| 458 | readTFile(parameters, true); |
---|
| 459 | createTFile(parameters, compress); |
---|
| 460 | readTFile(parameters, true); |
---|
| 461 | createSeqFile(parameters, compress); |
---|
| 462 | readSeqFile(parameters, true); |
---|
| 463 | } |
---|
| 464 | |
---|
| 465 | public void testRunComparisons() throws IOException { |
---|
| 466 | String[] compresses = new String[] { "none", "lzo", "gz" }; |
---|
| 467 | for (String compress : compresses) { |
---|
| 468 | if (compress.equals("none")) { |
---|
| 469 | conf |
---|
| 470 | .setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeNone); |
---|
| 471 | conf.setInt("tfile.fs.output.buffer.size", |
---|
| 472 | options.fsOutputBufferSizeNone); |
---|
| 473 | } |
---|
| 474 | else if (compress.equals("lzo")) { |
---|
| 475 | conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeLzo); |
---|
| 476 | conf.setInt("tfile.fs.output.buffer.size", |
---|
| 477 | options.fsOutputBufferSizeLzo); |
---|
| 478 | } |
---|
| 479 | else { |
---|
| 480 | conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeGz); |
---|
| 481 | conf |
---|
| 482 | .setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSizeGz); |
---|
| 483 | } |
---|
| 484 | compareRun(compress); |
---|
| 485 | } |
---|
| 486 | } |
---|
| 487 | |
---|
| 488 | private static String parameters2String(MyOptions options) { |
---|
| 489 | return String |
---|
| 490 | .format( |
---|
| 491 | "KLEN: %d-%d... VLEN: %d-%d...MinBlkSize: %.2fKB...Target Size: %.2fMB...Compression: ...%s", |
---|
| 492 | options.keyLength, options.keyLength * 2, options.valueLength, |
---|
| 493 | options.valueLength * 2, (double) options.minBlockSize / 1024, |
---|
| 494 | (double) options.fileSize / 1024 / 1024, options.compress); |
---|
| 495 | } |
---|
| 496 | |
---|
| 497 | private static class MyOptions { |
---|
| 498 | String rootDir = |
---|
| 499 | System |
---|
| 500 | .getProperty("test.build.data", "/tmp/tfile-test"); |
---|
| 501 | String compress = "gz"; |
---|
| 502 | String format = "tfile"; |
---|
| 503 | int dictSize = 1000; |
---|
| 504 | int minWordLen = 5; |
---|
| 505 | int maxWordLen = 20; |
---|
| 506 | int keyLength = 50; |
---|
| 507 | int valueLength = 100; |
---|
| 508 | int minBlockSize = 256 * 1024; |
---|
| 509 | int fsOutputBufferSize = 1; |
---|
| 510 | int fsInputBufferSize = 0; |
---|
| 511 | // special variable only for unit testing. |
---|
| 512 | int fsInputBufferSizeNone = 0; |
---|
| 513 | int fsInputBufferSizeGz = 0; |
---|
| 514 | int fsInputBufferSizeLzo = 0; |
---|
| 515 | int fsOutputBufferSizeNone = 1; |
---|
| 516 | int fsOutputBufferSizeGz = 1; |
---|
| 517 | int fsOutputBufferSizeLzo = 1; |
---|
| 518 | |
---|
| 519 | // un-exposed parameters. |
---|
| 520 | int osInputBufferSize = 64 * 1024; |
---|
| 521 | int osOutputBufferSize = 64 * 1024; |
---|
| 522 | |
---|
| 523 | long fileSize = 3 * 1024 * 1024; |
---|
| 524 | long seed; |
---|
| 525 | |
---|
| 526 | static final int OP_CREATE = 1; |
---|
| 527 | static final int OP_READ = 2; |
---|
| 528 | int op = OP_READ; |
---|
| 529 | |
---|
| 530 | boolean proceed = false; |
---|
| 531 | |
---|
| 532 | public MyOptions(String[] args) { |
---|
| 533 | seed = System.nanoTime(); |
---|
| 534 | |
---|
| 535 | try { |
---|
| 536 | Options opts = buildOptions(); |
---|
| 537 | CommandLineParser parser = new GnuParser(); |
---|
| 538 | CommandLine line = parser.parse(opts, args, true); |
---|
| 539 | processOptions(line, opts); |
---|
| 540 | validateOptions(); |
---|
| 541 | } |
---|
| 542 | catch (ParseException e) { |
---|
| 543 | System.out.println(e.getMessage()); |
---|
| 544 | System.out.println("Try \"--help\" option for details."); |
---|
| 545 | setStopProceed(); |
---|
| 546 | } |
---|
| 547 | } |
---|
| 548 | |
---|
| 549 | public boolean proceed() { |
---|
| 550 | return proceed; |
---|
| 551 | } |
---|
| 552 | |
---|
| 553 | private Options buildOptions() { |
---|
| 554 | Option compress = |
---|
| 555 | OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]") |
---|
| 556 | .hasArg().withDescription("compression scheme").create('c'); |
---|
| 557 | |
---|
| 558 | Option ditSize = |
---|
| 559 | OptionBuilder.withLongOpt("dict").withArgName("size").hasArg() |
---|
| 560 | .withDescription("number of dictionary entries").create('d'); |
---|
| 561 | |
---|
| 562 | Option fileSize = |
---|
| 563 | OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB") |
---|
| 564 | .hasArg().withDescription("target size of the file (in MB).") |
---|
| 565 | .create('s'); |
---|
| 566 | |
---|
| 567 | Option format = |
---|
| 568 | OptionBuilder.withLongOpt("format").withArgName("[tfile|seqfile]") |
---|
| 569 | .hasArg().withDescription("choose TFile or SeqFile").create('f'); |
---|
| 570 | |
---|
| 571 | Option fsInputBufferSz = |
---|
| 572 | OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size") |
---|
| 573 | .hasArg().withDescription( |
---|
| 574 | "size of the file system input buffer (in bytes).").create( |
---|
| 575 | 'i'); |
---|
| 576 | |
---|
| 577 | Option fsOutputBufferSize = |
---|
| 578 | OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size") |
---|
| 579 | .hasArg().withDescription( |
---|
| 580 | "size of the file system output buffer (in bytes).").create( |
---|
| 581 | 'o'); |
---|
| 582 | |
---|
| 583 | Option keyLen = |
---|
| 584 | OptionBuilder |
---|
| 585 | .withLongOpt("key-length") |
---|
| 586 | .withArgName("length") |
---|
| 587 | .hasArg() |
---|
| 588 | .withDescription( |
---|
| 589 | "base length of the key (in bytes), actual length varies in [base, 2*base)") |
---|
| 590 | .create('k'); |
---|
| 591 | |
---|
| 592 | Option valueLen = |
---|
| 593 | OptionBuilder |
---|
| 594 | .withLongOpt("value-length") |
---|
| 595 | .withArgName("length") |
---|
| 596 | .hasArg() |
---|
| 597 | .withDescription( |
---|
| 598 | "base length of the value (in bytes), actual length varies in [base, 2*base)") |
---|
| 599 | .create('v'); |
---|
| 600 | |
---|
| 601 | Option wordLen = |
---|
| 602 | OptionBuilder.withLongOpt("word-length").withArgName("min,max") |
---|
| 603 | .hasArg().withDescription( |
---|
| 604 | "range of dictionary word length (in bytes)").create('w'); |
---|
| 605 | |
---|
| 606 | Option blockSz = |
---|
| 607 | OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg() |
---|
| 608 | .withDescription("minimum block size (in KB)").create('b'); |
---|
| 609 | |
---|
| 610 | Option seed = |
---|
| 611 | OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg() |
---|
| 612 | .withDescription("specify the seed").create('S'); |
---|
| 613 | |
---|
| 614 | Option operation = |
---|
| 615 | OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg() |
---|
| 616 | .withDescription( |
---|
| 617 | "action: read-only, create-only, read-after-create").create( |
---|
| 618 | 'x'); |
---|
| 619 | |
---|
| 620 | Option rootDir = |
---|
| 621 | OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg() |
---|
| 622 | .withDescription( |
---|
| 623 | "specify root directory where files will be created.") |
---|
| 624 | .create('r'); |
---|
| 625 | |
---|
| 626 | Option help = |
---|
| 627 | OptionBuilder.withLongOpt("help").hasArg(false).withDescription( |
---|
| 628 | "show this screen").create("h"); |
---|
| 629 | |
---|
| 630 | return new Options().addOption(compress).addOption(ditSize).addOption( |
---|
| 631 | fileSize).addOption(format).addOption(fsInputBufferSz).addOption( |
---|
| 632 | fsOutputBufferSize).addOption(keyLen).addOption(wordLen).addOption( |
---|
| 633 | blockSz).addOption(rootDir).addOption(valueLen).addOption(operation) |
---|
| 634 | .addOption(help); |
---|
| 635 | |
---|
| 636 | } |
---|
| 637 | |
---|
| 638 | private void processOptions(CommandLine line, Options opts) |
---|
| 639 | throws ParseException { |
---|
| 640 | // --help -h and --version -V must be processed first. |
---|
| 641 | if (line.hasOption('h')) { |
---|
| 642 | HelpFormatter formatter = new HelpFormatter(); |
---|
| 643 | System.out.println("TFile and SeqFile benchmark."); |
---|
| 644 | System.out.println(); |
---|
| 645 | formatter.printHelp(100, |
---|
| 646 | "java ... TestTFileSeqFileComparison [options]", |
---|
| 647 | "\nSupported options:", opts, ""); |
---|
| 648 | return; |
---|
| 649 | } |
---|
| 650 | |
---|
| 651 | if (line.hasOption('c')) { |
---|
| 652 | compress = line.getOptionValue('c'); |
---|
| 653 | } |
---|
| 654 | |
---|
| 655 | if (line.hasOption('d')) { |
---|
| 656 | dictSize = Integer.parseInt(line.getOptionValue('d')); |
---|
| 657 | } |
---|
| 658 | |
---|
| 659 | if (line.hasOption('s')) { |
---|
| 660 | fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024; |
---|
| 661 | } |
---|
| 662 | |
---|
| 663 | if (line.hasOption('f')) { |
---|
| 664 | format = line.getOptionValue('f'); |
---|
| 665 | } |
---|
| 666 | |
---|
| 667 | if (line.hasOption('i')) { |
---|
| 668 | fsInputBufferSize = Integer.parseInt(line.getOptionValue('i')); |
---|
| 669 | } |
---|
| 670 | |
---|
| 671 | if (line.hasOption('o')) { |
---|
| 672 | fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o')); |
---|
| 673 | } |
---|
| 674 | |
---|
| 675 | if (line.hasOption('k')) { |
---|
| 676 | keyLength = Integer.parseInt(line.getOptionValue('k')); |
---|
| 677 | } |
---|
| 678 | |
---|
| 679 | if (line.hasOption('v')) { |
---|
| 680 | valueLength = Integer.parseInt(line.getOptionValue('v')); |
---|
| 681 | } |
---|
| 682 | |
---|
| 683 | if (line.hasOption('b')) { |
---|
| 684 | minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024; |
---|
| 685 | } |
---|
| 686 | |
---|
| 687 | if (line.hasOption('r')) { |
---|
| 688 | rootDir = line.getOptionValue('r'); |
---|
| 689 | } |
---|
| 690 | |
---|
| 691 | if (line.hasOption('S')) { |
---|
| 692 | seed = Long.parseLong(line.getOptionValue('S')); |
---|
| 693 | } |
---|
| 694 | |
---|
| 695 | if (line.hasOption('w')) { |
---|
| 696 | String min_max = line.getOptionValue('w'); |
---|
| 697 | StringTokenizer st = new StringTokenizer(min_max, " \t,"); |
---|
| 698 | if (st.countTokens() != 2) { |
---|
| 699 | throw new ParseException("Bad word length specification: " + min_max); |
---|
| 700 | } |
---|
| 701 | minWordLen = Integer.parseInt(st.nextToken()); |
---|
| 702 | maxWordLen = Integer.parseInt(st.nextToken()); |
---|
| 703 | } |
---|
| 704 | |
---|
| 705 | if (line.hasOption('x')) { |
---|
| 706 | String strOp = line.getOptionValue('x'); |
---|
| 707 | if (strOp.equals("r")) { |
---|
| 708 | op = OP_READ; |
---|
| 709 | } |
---|
| 710 | else if (strOp.equals("w")) { |
---|
| 711 | op = OP_CREATE; |
---|
| 712 | } |
---|
| 713 | else if (strOp.equals("rw")) { |
---|
| 714 | op = OP_CREATE | OP_READ; |
---|
| 715 | } |
---|
| 716 | else { |
---|
| 717 | throw new ParseException("Unknown action specifier: " + strOp); |
---|
| 718 | } |
---|
| 719 | } |
---|
| 720 | |
---|
| 721 | proceed = true; |
---|
| 722 | } |
---|
| 723 | |
---|
| 724 | private void validateOptions() throws ParseException { |
---|
| 725 | if (!compress.equals("none") && !compress.equals("lzo") |
---|
| 726 | && !compress.equals("gz")) { |
---|
| 727 | throw new ParseException("Unknown compression scheme: " + compress); |
---|
| 728 | } |
---|
| 729 | |
---|
| 730 | if (!format.equals("tfile") && !format.equals("seqfile")) { |
---|
| 731 | throw new ParseException("Unknown file format: " + format); |
---|
| 732 | } |
---|
| 733 | |
---|
| 734 | if (minWordLen >= maxWordLen) { |
---|
| 735 | throw new ParseException( |
---|
| 736 | "Max word length must be greater than min word length."); |
---|
| 737 | } |
---|
| 738 | return; |
---|
| 739 | } |
---|
| 740 | |
---|
| 741 | private void setStopProceed() { |
---|
| 742 | proceed = false; |
---|
| 743 | } |
---|
| 744 | |
---|
| 745 | public boolean doCreate() { |
---|
| 746 | return (op & OP_CREATE) != 0; |
---|
| 747 | } |
---|
| 748 | |
---|
| 749 | public boolean doRead() { |
---|
| 750 | return (op & OP_READ) != 0; |
---|
| 751 | } |
---|
| 752 | } |
---|
| 753 | |
---|
| 754 | public static void main(String[] args) throws IOException { |
---|
| 755 | TestTFileSeqFileComparison testCase = new TestTFileSeqFileComparison(); |
---|
| 756 | MyOptions options = new MyOptions(args); |
---|
| 757 | if (options.proceed == false) { |
---|
| 758 | return; |
---|
| 759 | } |
---|
| 760 | testCase.options = options; |
---|
| 761 | String parameters = parameters2String(options); |
---|
| 762 | |
---|
| 763 | testCase.setUp(); |
---|
| 764 | if (testCase.options.format.equals("tfile")) { |
---|
| 765 | if (options.doCreate()) { |
---|
| 766 | testCase.createTFile(parameters, options.compress); |
---|
| 767 | } |
---|
| 768 | if (options.doRead()) { |
---|
| 769 | testCase.readTFile(parameters, options.doCreate()); |
---|
| 770 | } |
---|
| 771 | } |
---|
| 772 | else { |
---|
| 773 | if (options.doCreate()) { |
---|
| 774 | testCase.createSeqFile(parameters, options.compress); |
---|
| 775 | } |
---|
| 776 | if (options.doRead()) { |
---|
| 777 | testCase.readSeqFile(parameters, options.doCreate()); |
---|
| 778 | } |
---|
| 779 | } |
---|
| 780 | testCase.tearDown(); |
---|
| 781 | } |
---|
| 782 | } |
---|