source: proiecte/HadoopJUnit/hadoop-0.20.1/contrib/hod/support/logcondense.py @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 6.9 KB
Line 
1#!/bin/sh
2
3#Licensed to the Apache Software Foundation (ASF) under one
4#or more contributor license agreements.  See the NOTICE file
5#distributed with this work for additional information
6#regarding copyright ownership.  The ASF licenses this file
7#to you under the Apache License, Version 2.0 (the
8#"License"); you may not use this file except in compliance
9#with the License.  You may obtain a copy of the License at
10
11#     http://www.apache.org/licenses/LICENSE-2.0
12
13#Unless required by applicable law or agreed to in writing, software
14#distributed under the License is distributed on an "AS IS" BASIS,
15#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16#See the License for the specific language governing permissions and
17#limitations under the License.
18""":"
19work_dir=$(dirname $0)
20base_name=$(basename $0)
21cd $work_dir
22
23if [ $HOD_PYTHON_HOME ]; then
24exec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"}
25elif [ -e /usr/bin/python ]; then
26exec /usr/bin/python -OO -u $base_name ${1+"$@"}
27elif [ -e /usr/local/bin/python ]; then
28exec /usr/local/bin/python -OO -u $base_name ${1+"$@"}
29else
30exec python -OO -u $base_name ${1+"$@"}
31fi
32":"""
33                                             
34from os import popen3
35import os, sys
36import re
37import time
38from datetime import datetime
39from optparse import OptionParser
40
41myName          = os.path.basename(sys.argv[0])
42myName          = re.sub(".*/", "", myName)
43
44reVersion = re.compile(".*(\d+_\d+).*")
45
46VERSION = '$HeadURL: http://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20/src/contrib/hod/support/logcondense.py $'
47
48reMatch = reVersion.match(VERSION)
49if reMatch:
50    VERSION = reMatch.group(1)
51    VERSION = re.sub("_", ".", VERSION)
52else:
53    VERSION = 'DEV'
54
55options = ( {'short'   : "-p",
56             'long'    : "--package",
57             'type'    : "string",
58             'action'  : "store",
59             'dest'    : "package",
60             'metavar' : " ",
61             'default' : 'hadoop',
62             'help'    : "Bin file for hadoop"},
63
64            {'short'   : "-d",
65             'long'    : "--days",
66             'type'    : "int",
67             'action'  : "store",
68             'dest'    : "days",
69             'metavar' : " ",
70             'default' : 7,
71             'help'    : "Number of days before logs are deleted"},
72           
73            {'short'   : "-c",
74             'long'    : "--config",
75             'type'    : "string",
76             'action'  : "store",
77             'dest'    : "config",
78             'metavar' : " ",
79             'default' : None,
80             'help'    : "config directory for hadoop"},
81           
82            {'short'   : "-l",
83             'long'    : "--logs",
84             'type'    : "string",
85             'action'  : "store",
86             'dest'    : "log",
87             'metavar' : " ",
88             'default' : "/user",
89             'help'    : "directory prefix under which logs are stored per user"},
90
91            {'short'   : "-n",
92             'long'    : "--dynamicdfs",
93             'type'    : "string",
94             'action'  : "store",
95             'dest'    : "dynamicdfs",
96             'metavar' : " ",
97             'default' : "false",
98             'help'    : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"}
99            )
100
101def getDfsCommand(options, args):
102  if (options.config == None): 
103    cmd = options.package + " " + "dfs " + args
104  else:
105    cmd = options.package + " " + "--config " + options.config + " dfs " + args
106  return cmd
107
108def runcondense():
109  import shutil
110 
111  options = process_args()
112  # if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs,
113  # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for
114  # deleting datanode logs
115  filteredNames = ['jobtracker']
116  deletedNamePrefixes = ['*-tasktracker-*']
117  if options.dynamicdfs == 'true':
118    filteredNames.append('namenode')
119    deletedNamePrefixes.append('*-datanode-*')
120
121  filepath = '%s/\*/hod-logs/' % (options.log)
122  cmd = getDfsCommand(options, "-lsr " + filepath)
123  (stdin, stdout, stderr) = popen3(cmd)
124  lastjobid = 'none'
125  toPurge = { }
126  for line in stdout:
127    try:
128      m = re.match("^.*\s(.*)\n$", line)
129      filename = m.group(1)
130      # file name format: <prefix>/<user>/hod-logs/<jobid>/[0-9]*-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz
131      # first strip prefix:
132      if filename.startswith(options.log):
133        filename = filename.lstrip(options.log)
134        if not filename.startswith('/'):
135          filename = '/' + filename
136      else:
137        continue
138   
139      # Now get other details from filename.
140      k = re.match("/(.*)/hod-logs/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename)
141      if k:
142        username = k.group(1)
143        jobid =  k.group(2)
144        datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5)))
145        datetimenow = datetime.utcnow()
146        diff = datetimenow - datetimefile
147        filedate = k.group(3) + k.group(4) + k.group(5)
148        newdate = datetimenow.strftime("%Y%m%d")
149        print "%s %s %s %d" % (filename,  filedate, newdate, diff.days)
150
151        # if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs.
152        foundFilteredName = False
153        for name in filteredNames:
154          if filename.find(name) >= 0:
155            foundFilteredName = True
156            break
157
158        if foundFilteredName:
159          continue
160
161        if (diff.days > options.days):
162          desttodel = filename
163          if not toPurge.has_key(jobid):
164            toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid
165    except Exception, e:
166      print >> sys.stderr, e
167
168  for job in toPurge.keys():
169    try:
170      for prefix in deletedNamePrefixes:
171        cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
172        print cmd
173        ret = 0
174        ret = os.system(cmd)
175        if (ret != 0):
176          print >> sys.stderr, "Command failed to delete file " + cmd
177    except Exception, e:
178      print >> sys.stderr, e
179         
180       
181def process_args():
182  global options, myName, VERSION
183 
184  usage = "usage: %s <ARGS>" % (myName)
185 
186  version = "%s %s" % (myName, VERSION)
187 
188  argParser = OptionParser(usage=usage, version=VERSION)
189 
190  for option_element in options:
191    argParser.add_option(option_element['short'], option_element['long'],
192                         type=option_element['type'], action=option_element['action'],
193                         dest=option_element['dest'], default=option_element['default'],
194                         metavar=option_element['metavar'], help=option_element['help'])
195
196  (parsedOptions, args) = argParser.parse_args()
197 
198  if not os.path.exists(parsedOptions.package):
199    argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package)
200  if not os.path.exists(parsedOptions.config):
201    argParser.error("Could not find config: %s" % parsedOptions.config)
202  if parsedOptions.days <= 0:
203    argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config)
204  if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false':
205    argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs)
206
207  return parsedOptions
208 
209 
210if __name__ == '__main__':
211  runcondense()
212 
Note: See TracBrowser for help on using the repository browser.