Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

logcondense.py @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago
Added the mail files for the Hadoop JUNit Project
Property svn:executable set to ``*
File size: 6.9 KB

Line
1	#!/bin/sh
2
3	#Licensed to the Apache Software Foundation (ASF) under one
4	#or more contributor license agreements. See the NOTICE file
5	#distributed with this work for additional information
6	#regarding copyright ownership. The ASF licenses this file
7	#to you under the Apache License, Version 2.0 (the
8	#"License"); you may not use this file except in compliance
9	#with the License. You may obtain a copy of the License at
10
11	# http://www.apache.org/licenses/LICENSE-2.0
12
13	#Unless required by applicable law or agreed to in writing, software
14	#distributed under the License is distributed on an "AS IS" BASIS,
15	#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16	#See the License for the specific language governing permissions and
17	#limitations under the License.
18	""":"
19	work_dir=$(dirname $0)
20	base_name=$(basename $0)
21	cd $work_dir
22
23	if [ $HOD_PYTHON_HOME ]; then
24	exec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"}
25	elif [ -e /usr/bin/python ]; then
26	exec /usr/bin/python -OO -u $base_name ${1+"$@"}
27	elif [ -e /usr/local/bin/python ]; then
28	exec /usr/local/bin/python -OO -u $base_name ${1+"$@"}
29	else
30	exec python -OO -u $base_name ${1+"$@"}
31	fi
32	":"""
33
34	from os import popen3
35	import os, sys
36	import re
37	import time
38	from datetime import datetime
39	from optparse import OptionParser
40
41	myName = os.path.basename(sys.argv[0])
42	myName = re.sub(".*/", "", myName)
43
44	reVersion = re.compile(".(\d+_\d+).")
45
46	VERSION = '$HeadURL: http://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20/src/contrib/hod/support/logcondense.py $'
47
48	reMatch = reVersion.match(VERSION)
49	if reMatch:
50	VERSION = reMatch.group(1)
51	VERSION = re.sub("_", ".", VERSION)
52	else:
53	VERSION = 'DEV'
54
55	options = ( {'short' : "-p",
56	'long' : "--package",
57	'type' : "string",
58	'action' : "store",
59	'dest' : "package",
60	'metavar' : " ",
61	'default' : 'hadoop',
62	'help' : "Bin file for hadoop"},
63
64	{'short' : "-d",
65	'long' : "--days",
66	'type' : "int",
67	'action' : "store",
68	'dest' : "days",
69	'metavar' : " ",
70	'default' : 7,
71	'help' : "Number of days before logs are deleted"},
72
73	{'short' : "-c",
74	'long' : "--config",
75	'type' : "string",
76	'action' : "store",
77	'dest' : "config",
78	'metavar' : " ",
79	'default' : None,
80	'help' : "config directory for hadoop"},
81
82	{'short' : "-l",
83	'long' : "--logs",
84	'type' : "string",
85	'action' : "store",
86	'dest' : "log",
87	'metavar' : " ",
88	'default' : "/user",
89	'help' : "directory prefix under which logs are stored per user"},
90
91	{'short' : "-n",
92	'long' : "--dynamicdfs",
93	'type' : "string",
94	'action' : "store",
95	'dest' : "dynamicdfs",
96	'metavar' : " ",
97	'default' : "false",
98	'help' : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"}
99	)
100
101	def getDfsCommand(options, args):
102	if (options.config == None):
103	cmd = options.package + " " + "dfs " + args
104	else:
105	cmd = options.package + " " + "--config " + options.config + " dfs " + args
106	return cmd
107
108	def runcondense():
109	import shutil
110
111	options = process_args()
112	# if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs,
113	# otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for
114	# deleting datanode logs
115	filteredNames = ['jobtracker']
116	deletedNamePrefixes = ['-tasktracker-']
117	if options.dynamicdfs == 'true':
118	filteredNames.append('namenode')
119	deletedNamePrefixes.append('-datanode-')
120
121	filepath = '%s/\*/hod-logs/' % (options.log)
122	cmd = getDfsCommand(options, "-lsr " + filepath)
123	(stdin, stdout, stderr) = popen3(cmd)
124	lastjobid = 'none'
125	toPurge = { }
126	for line in stdout:
127	try:
128	m = re.match("^.\s(.)\n$", line)
129	filename = m.group(1)
130	# file name format: <prefix>/<user>/hod-logs/<jobid>/[0-9]*-[jobtracker\|tasktracker\|datanode\|namenode\|]-hostname-YYYYMMDDtime-random.tar.gz
131	# first strip prefix:
132	if filename.startswith(options.log):
133	filename = filename.lstrip(options.log)
134	if not filename.startswith('/'):
135	filename = '/' + filename
136	else:
137	continue
138
139	# Now get other details from filename.
140	k = re.match("/(.)/hod-logs/(.)/.-.-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename)
141	if k:
142	username = k.group(1)
143	jobid = k.group(2)
144	datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5)))
145	datetimenow = datetime.utcnow()
146	diff = datetimenow - datetimefile
147	filedate = k.group(3) + k.group(4) + k.group(5)
148	newdate = datetimenow.strftime("%Y%m%d")
149	print "%s %s %s %d" % (filename, filedate, newdate, diff.days)
150
151	# if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs.
152	foundFilteredName = False
153	for name in filteredNames:
154	if filename.find(name) >= 0:
155	foundFilteredName = True
156	break
157
158	if foundFilteredName:
159	continue
160
161	if (diff.days > options.days):
162	desttodel = filename
163	if not toPurge.has_key(jobid):
164	toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid
165	except Exception, e:
166	print >> sys.stderr, e
167
168	for job in toPurge.keys():
169	try:
170	for prefix in deletedNamePrefixes:
171	cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
172	print cmd
173	ret = 0
174	ret = os.system(cmd)
175	if (ret != 0):
176	print >> sys.stderr, "Command failed to delete file " + cmd
177	except Exception, e:
178	print >> sys.stderr, e
179
180
181	def process_args():
182	global options, myName, VERSION
183
184	usage = "usage: %s <ARGS>" % (myName)
185
186	version = "%s %s" % (myName, VERSION)
187
188	argParser = OptionParser(usage=usage, version=VERSION)
189
190	for option_element in options:
191	argParser.add_option(option_element['short'], option_element['long'],
192	type=option_element['type'], action=option_element['action'],
193	dest=option_element['dest'], default=option_element['default'],
194	metavar=option_element['metavar'], help=option_element['help'])
195
196	(parsedOptions, args) = argParser.parse_args()
197
198	if not os.path.exists(parsedOptions.package):
199	argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package)
200	if not os.path.exists(parsedOptions.config):
201	argParser.error("Could not find config: %s" % parsedOptions.config)
202	if parsedOptions.days <= 0:
203	argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config)
204	if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false':
205	argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs)
206
207	return parsedOptions
208
209
210	if __name__ == '__main__':
211	runcondense()
212

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/HadoopJUnit/hadoop-0.20.1/contrib/hod/support/logcondense.py @ 120

Download in other formats: