Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

generateData.sh @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago
Added the mail files for the Hadoop JUNit Project
Property svn:executable set to ``*
File size: 2.9 KB

Line
1	#!/usr/bin/env bash
2
3	GRID_DIR=`dirname "$0"`
4	GRID_DIR=`cd "$GRID_DIR"; pwd`
5	source $GRID_DIR/gridmix-env
6
7	# Smaller data set is used by default.
8	COMPRESSED_DATA_BYTES=2147483648
9	UNCOMPRESSED_DATA_BYTES=536870912
10	INDIRECT_DATA_BYTES=58720256
11
12	# Number of partitions for output data
13	if [ -z ${NUM_MAPS} ] ; then
14	NUM_MAPS=100
15	fi
16
17	INDIRECT_DATA_FILES=200
18
19	# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
20	if [ ! -z ${USE_REAL_DATASET} ] ; then
21	echo "Using real dataset"
22	# 2TB data compressing to approx 500GB
23	COMPRESSED_DATA_BYTES=2147483648000
24	# 500GB
25	UNCOMPRESSED_DATA_BYTES=536870912000
26	# Default approx 70MB per data file, compressed
27	INDIRECT_DATA_BYTES=58720256000
28	fi
29
30	${HADOOP_HOME}/bin/hadoop jar \
31	${EXAMPLE_JAR} randomtextwriter \
32	-D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
33	-D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
34	-D test.randomtextwrite.min_words_key=5 \
35	-D test.randomtextwrite.max_words_key=10 \
36	-D test.randomtextwrite.min_words_value=100 \
37	-D test.randomtextwrite.max_words_value=10000 \
38	-D mapred.output.compress=true \
39	-D mapred.map.output.compression.type=BLOCK \
40	-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
41	${VARCOMPSEQ} &
42
43	${HADOOP_HOME}/bin/hadoop jar \
44	${EXAMPLE_JAR} randomtextwriter \
45	-D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
46	-D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
47	-D test.randomtextwrite.min_words_key=5 \
48	-D test.randomtextwrite.max_words_key=5 \
49	-D test.randomtextwrite.min_words_value=100 \
50	-D test.randomtextwrite.max_words_value=100 \
51	-D mapred.output.compress=true \
52	-D mapred.map.output.compression.type=BLOCK \
53	-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
54	${FIXCOMPSEQ} &
55
56	${HADOOP_HOME}/bin/hadoop jar \
57	${EXAMPLE_JAR} randomtextwriter \
58	-D test.randomtextwrite.total_bytes=${UNCOMPRESSED_DATA_BYTES} \
59	-D test.randomtextwrite.bytes_per_map=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
60	-D test.randomtextwrite.min_words_key=1 \
61	-D test.randomtextwrite.max_words_key=10 \
62	-D test.randomtextwrite.min_words_value=0 \
63	-D test.randomtextwrite.max_words_value=200 \
64	-D mapred.output.compress=false \
65	-outFormat org.apache.hadoop.mapred.TextOutputFormat \
66	${VARINFLTEXT} &
67
68	${HADOOP_HOME}/bin/hadoop jar \
69	${EXAMPLE_JAR} randomtextwriter \
70	-D test.randomtextwrite.total_bytes=${INDIRECT_DATA_BYTES} \
71	-D test.randomtextwrite.bytes_per_map=$((${INDIRECT_DATA_BYTES} / ${INDIRECT_DATA_FILES})) \
72	-D test.randomtextwrite.min_words_key=5 \
73	-D test.randomtextwrite.max_words_key=5 \
74	-D test.randomtextwrite.min_words_value=20 \
75	-D test.randomtextwrite.max_words_value=20 \
76	-D mapred.output.compress=true \
77	-D mapred.map.output.compression.type=BLOCK \
78	-outFormat org.apache.hadoop.mapred.TextOutputFormat \
79	${FIXCOMPTEXT} &

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/HadoopJUnit/hadoop-0.20.1/src/benchmarks/gridmix/generateData.sh @ 120

Download in other formats: