Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

generateGridmix2data.sh @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago
Added the mail files for the Hadoop JUNit Project
Property svn:executable set to ``*
File size: 3.6 KB

Rev	Line
[120]	1	#!/usr/bin/env bash
	2
	3	##############################################################
	4	# Licensed to the Apache Software Foundation (ASF) under one
	5	# or more contributor license agreements. See the NOTICE file
	6	# distributed with this work for additional information
	7	# regarding copyright ownership. The ASF licenses this file
	8	# to you under the Apache License, Version 2.0 (the
	9	# "License"); you may not use this file except in compliance
	10	# with the License. You may obtain a copy of the License at
	11	#
	12	# http://www.apache.org/licenses/LICENSE-2.0
	13	#
	14	# Unless required by applicable law or agreed to in writing, software
	15	# distributed under the License is distributed on an "AS IS" BASIS,
	16	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	17	# See the License for the specific language governing permissions and
	18	# limitations under the License.
	19	#
	20	#####################################################################
	21
	22	GRID_DIR=`dirname "$0"`
	23	GRID_DIR=`cd "$GRID_DIR"; pwd`
	24	source $GRID_DIR/gridmix-env-2
	25
	26	# Smaller data set is used by default.
	27	COMPRESSED_DATA_BYTES=2147483648
	28	UNCOMPRESSED_DATA_BYTES=536870912
	29
	30	# Number of partitions for output data
	31	NUM_MAPS=100
	32
	33	# If the env var USE_REAL_DATASET is set, then use the params to generate the bigger (real) dataset.
	34	if [ ! -z ${USE_REAL_DATASET} ] ; then
	35	echo "Using real dataset"
	36	NUM_MAPS=492
	37	# 2TB data compressing to approx 500GB
	38	COMPRESSED_DATA_BYTES=2147483648000
	39	# 500GB
	40	UNCOMPRESSED_DATA_BYTES=536870912000
	41	fi
	42
	43	## Data sources
	44	export GRID_MIX_DATA=/gridmix/data
	45	# Variable length key, value compressed SequenceFile
	46	export VARCOMPSEQ=${GRID_MIX_DATA}/WebSimulationBlockCompressed
	47	# Fixed length key, value compressed SequenceFile
	48	export FIXCOMPSEQ=${GRID_MIX_DATA}/MonsterQueryBlockCompressed
	49	# Variable length key, value uncompressed Text File
	50	export VARINFLTEXT=${GRID_MIX_DATA}/SortUncompressed
	51	# Fixed length key, value compressed Text File
	52	export FIXCOMPTEXT=${GRID_MIX_DATA}/EntropySimulationCompressed
	53
	54	${HADOOP_HOME}/bin/hadoop jar \
	55	${EXAMPLE_JAR} randomtextwriter \
	56	-D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
	57	-D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
	58	-D test.randomtextwrite.min_words_key=5 \
	59	-D test.randomtextwrite.max_words_key=10 \
	60	-D test.randomtextwrite.min_words_value=100 \
	61	-D test.randomtextwrite.max_words_value=10000 \
	62	-D mapred.output.compress=true \
	63	-D mapred.map.output.compression.type=BLOCK \
	64	-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
	65	${VARCOMPSEQ} &
	66
	67
	68	${HADOOP_HOME}/bin/hadoop jar \
	69	${EXAMPLE_JAR} randomtextwriter \
	70	-D test.randomtextwrite.total_bytes=${COMPRESSED_DATA_BYTES} \
	71	-D test.randomtextwrite.bytes_per_map=$((${COMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
	72	-D test.randomtextwrite.min_words_key=5 \
	73	-D test.randomtextwrite.max_words_key=5 \
	74	-D test.randomtextwrite.min_words_value=100 \
	75	-D test.randomtextwrite.max_words_value=100 \
	76	-D mapred.output.compress=true \
	77	-D mapred.map.output.compression.type=BLOCK \
	78	-outFormat org.apache.hadoop.mapred.SequenceFileOutputFormat \
	79	${FIXCOMPSEQ} &
	80
	81
	82	${HADOOP_HOME}/bin/hadoop jar \
	83	${EXAMPLE_JAR} randomtextwriter \
	84	-D test.randomtextwrite.total_bytes=${UNCOMPRESSED_DATA_BYTES} \
	85	-D test.randomtextwrite.bytes_per_map=$((${UNCOMPRESSED_DATA_BYTES} / ${NUM_MAPS})) \
	86	-D test.randomtextwrite.min_words_key=1 \
	87	-D test.randomtextwrite.max_words_key=10 \
	88	-D test.randomtextwrite.min_words_value=0 \
	89	-D test.randomtextwrite.max_words_value=200 \
	90	-D mapred.output.compress=false \
	91	-outFormat org.apache.hadoop.mapred.TextOutputFormat \
	92	${VARINFLTEXT} &
	93
	94

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/HadoopJUnit/hadoop-0.20.1/src/benchmarks/gridmix2/generateGridmix2data.sh @ 120

Download in other formats: