[120] | 1 | #!/bin/sh |
---|
| 2 | |
---|
| 3 | #Licensed to the Apache Software Foundation (ASF) under one |
---|
| 4 | #or more contributor license agreements. See the NOTICE file |
---|
| 5 | #distributed with this work for additional information |
---|
| 6 | #regarding copyright ownership. The ASF licenses this file |
---|
| 7 | #to you under the Apache License, Version 2.0 (the |
---|
| 8 | #"License"); you may not use this file except in compliance |
---|
| 9 | #with the License. You may obtain a copy of the License at |
---|
| 10 | |
---|
| 11 | # http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 12 | |
---|
| 13 | #Unless required by applicable law or agreed to in writing, software |
---|
| 14 | #distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 15 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 16 | #See the License for the specific language governing permissions and |
---|
| 17 | #limitations under the License. |
---|
| 18 | """:" |
---|
| 19 | work_dir=$(dirname $0) |
---|
| 20 | base_name=$(basename $0) |
---|
| 21 | cd $work_dir |
---|
| 22 | |
---|
| 23 | if [ $HOD_PYTHON_HOME ]; then |
---|
| 24 | exec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"} |
---|
| 25 | elif [ -e /usr/bin/python ]; then |
---|
| 26 | exec /usr/bin/python -OO -u $base_name ${1+"$@"} |
---|
| 27 | elif [ -e /usr/local/bin/python ]; then |
---|
| 28 | exec /usr/local/bin/python -OO -u $base_name ${1+"$@"} |
---|
| 29 | else |
---|
| 30 | exec python -OO -u $base_name ${1+"$@"} |
---|
| 31 | fi |
---|
| 32 | ":""" |
---|
| 33 | |
---|
| 34 | from os import popen3 |
---|
| 35 | import os, sys |
---|
| 36 | import re |
---|
| 37 | import time |
---|
| 38 | from datetime import datetime |
---|
| 39 | from optparse import OptionParser |
---|
| 40 | |
---|
| 41 | myName = os.path.basename(sys.argv[0]) |
---|
| 42 | myName = re.sub(".*/", "", myName) |
---|
| 43 | |
---|
| 44 | reVersion = re.compile(".*(\d+_\d+).*") |
---|
| 45 | |
---|
| 46 | VERSION = '$HeadURL: http://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20/src/contrib/hod/support/logcondense.py $' |
---|
| 47 | |
---|
| 48 | reMatch = reVersion.match(VERSION) |
---|
| 49 | if reMatch: |
---|
| 50 | VERSION = reMatch.group(1) |
---|
| 51 | VERSION = re.sub("_", ".", VERSION) |
---|
| 52 | else: |
---|
| 53 | VERSION = 'DEV' |
---|
| 54 | |
---|
| 55 | options = ( {'short' : "-p", |
---|
| 56 | 'long' : "--package", |
---|
| 57 | 'type' : "string", |
---|
| 58 | 'action' : "store", |
---|
| 59 | 'dest' : "package", |
---|
| 60 | 'metavar' : " ", |
---|
| 61 | 'default' : 'hadoop', |
---|
| 62 | 'help' : "Bin file for hadoop"}, |
---|
| 63 | |
---|
| 64 | {'short' : "-d", |
---|
| 65 | 'long' : "--days", |
---|
| 66 | 'type' : "int", |
---|
| 67 | 'action' : "store", |
---|
| 68 | 'dest' : "days", |
---|
| 69 | 'metavar' : " ", |
---|
| 70 | 'default' : 7, |
---|
| 71 | 'help' : "Number of days before logs are deleted"}, |
---|
| 72 | |
---|
| 73 | {'short' : "-c", |
---|
| 74 | 'long' : "--config", |
---|
| 75 | 'type' : "string", |
---|
| 76 | 'action' : "store", |
---|
| 77 | 'dest' : "config", |
---|
| 78 | 'metavar' : " ", |
---|
| 79 | 'default' : None, |
---|
| 80 | 'help' : "config directory for hadoop"}, |
---|
| 81 | |
---|
| 82 | {'short' : "-l", |
---|
| 83 | 'long' : "--logs", |
---|
| 84 | 'type' : "string", |
---|
| 85 | 'action' : "store", |
---|
| 86 | 'dest' : "log", |
---|
| 87 | 'metavar' : " ", |
---|
| 88 | 'default' : "/user", |
---|
| 89 | 'help' : "directory prefix under which logs are stored per user"}, |
---|
| 90 | |
---|
| 91 | {'short' : "-n", |
---|
| 92 | 'long' : "--dynamicdfs", |
---|
| 93 | 'type' : "string", |
---|
| 94 | 'action' : "store", |
---|
| 95 | 'dest' : "dynamicdfs", |
---|
| 96 | 'metavar' : " ", |
---|
| 97 | 'default' : "false", |
---|
| 98 | 'help' : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"} |
---|
| 99 | ) |
---|
| 100 | |
---|
| 101 | def getDfsCommand(options, args): |
---|
| 102 | if (options.config == None): |
---|
| 103 | cmd = options.package + " " + "dfs " + args |
---|
| 104 | else: |
---|
| 105 | cmd = options.package + " " + "--config " + options.config + " dfs " + args |
---|
| 106 | return cmd |
---|
| 107 | |
---|
| 108 | def runcondense(): |
---|
| 109 | import shutil |
---|
| 110 | |
---|
| 111 | options = process_args() |
---|
| 112 | # if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs, |
---|
| 113 | # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for |
---|
| 114 | # deleting datanode logs |
---|
| 115 | filteredNames = ['jobtracker'] |
---|
| 116 | deletedNamePrefixes = ['*-tasktracker-*'] |
---|
| 117 | if options.dynamicdfs == 'true': |
---|
| 118 | filteredNames.append('namenode') |
---|
| 119 | deletedNamePrefixes.append('*-datanode-*') |
---|
| 120 | |
---|
| 121 | filepath = '%s/\*/hod-logs/' % (options.log) |
---|
| 122 | cmd = getDfsCommand(options, "-lsr " + filepath) |
---|
| 123 | (stdin, stdout, stderr) = popen3(cmd) |
---|
| 124 | lastjobid = 'none' |
---|
| 125 | toPurge = { } |
---|
| 126 | for line in stdout: |
---|
| 127 | try: |
---|
| 128 | m = re.match("^.*\s(.*)\n$", line) |
---|
| 129 | filename = m.group(1) |
---|
| 130 | # file name format: <prefix>/<user>/hod-logs/<jobid>/[0-9]*-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz |
---|
| 131 | # first strip prefix: |
---|
| 132 | if filename.startswith(options.log): |
---|
| 133 | filename = filename.lstrip(options.log) |
---|
| 134 | if not filename.startswith('/'): |
---|
| 135 | filename = '/' + filename |
---|
| 136 | else: |
---|
| 137 | continue |
---|
| 138 | |
---|
| 139 | # Now get other details from filename. |
---|
| 140 | k = re.match("/(.*)/hod-logs/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename) |
---|
| 141 | if k: |
---|
| 142 | username = k.group(1) |
---|
| 143 | jobid = k.group(2) |
---|
| 144 | datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5))) |
---|
| 145 | datetimenow = datetime.utcnow() |
---|
| 146 | diff = datetimenow - datetimefile |
---|
| 147 | filedate = k.group(3) + k.group(4) + k.group(5) |
---|
| 148 | newdate = datetimenow.strftime("%Y%m%d") |
---|
| 149 | print "%s %s %s %d" % (filename, filedate, newdate, diff.days) |
---|
| 150 | |
---|
| 151 | # if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs. |
---|
| 152 | foundFilteredName = False |
---|
| 153 | for name in filteredNames: |
---|
| 154 | if filename.find(name) >= 0: |
---|
| 155 | foundFilteredName = True |
---|
| 156 | break |
---|
| 157 | |
---|
| 158 | if foundFilteredName: |
---|
| 159 | continue |
---|
| 160 | |
---|
| 161 | if (diff.days > options.days): |
---|
| 162 | desttodel = filename |
---|
| 163 | if not toPurge.has_key(jobid): |
---|
| 164 | toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid |
---|
| 165 | except Exception, e: |
---|
| 166 | print >> sys.stderr, e |
---|
| 167 | |
---|
| 168 | for job in toPurge.keys(): |
---|
| 169 | try: |
---|
| 170 | for prefix in deletedNamePrefixes: |
---|
| 171 | cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix) |
---|
| 172 | print cmd |
---|
| 173 | ret = 0 |
---|
| 174 | ret = os.system(cmd) |
---|
| 175 | if (ret != 0): |
---|
| 176 | print >> sys.stderr, "Command failed to delete file " + cmd |
---|
| 177 | except Exception, e: |
---|
| 178 | print >> sys.stderr, e |
---|
| 179 | |
---|
| 180 | |
---|
| 181 | def process_args(): |
---|
| 182 | global options, myName, VERSION |
---|
| 183 | |
---|
| 184 | usage = "usage: %s <ARGS>" % (myName) |
---|
| 185 | |
---|
| 186 | version = "%s %s" % (myName, VERSION) |
---|
| 187 | |
---|
| 188 | argParser = OptionParser(usage=usage, version=VERSION) |
---|
| 189 | |
---|
| 190 | for option_element in options: |
---|
| 191 | argParser.add_option(option_element['short'], option_element['long'], |
---|
| 192 | type=option_element['type'], action=option_element['action'], |
---|
| 193 | dest=option_element['dest'], default=option_element['default'], |
---|
| 194 | metavar=option_element['metavar'], help=option_element['help']) |
---|
| 195 | |
---|
| 196 | (parsedOptions, args) = argParser.parse_args() |
---|
| 197 | |
---|
| 198 | if not os.path.exists(parsedOptions.package): |
---|
| 199 | argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package) |
---|
| 200 | if not os.path.exists(parsedOptions.config): |
---|
| 201 | argParser.error("Could not find config: %s" % parsedOptions.config) |
---|
| 202 | if parsedOptions.days <= 0: |
---|
| 203 | argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config) |
---|
| 204 | if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false': |
---|
| 205 | argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs) |
---|
| 206 | |
---|
| 207 | return parsedOptions |
---|
| 208 | |
---|
| 209 | |
---|
| 210 | if __name__ == '__main__': |
---|
| 211 | runcondense() |
---|
| 212 | |
---|