|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.hadoop.mapred.JobTracker
public class JobTracker
JobTracker is the central location for submitting and tracking MR jobs in a network environment.
Nested Class Summary | |
---|---|
static class |
JobTracker.IllegalStateException
A client tried to submit a job before the Job Tracker was ready. |
static class |
JobTracker.State
|
Field Summary | |
---|---|
static int |
CLUSTER_INCREMENT
|
static long |
COUNTER_UPDATE_INTERVAL
|
static int |
FILE_NOT_FOUND
|
static String |
FOR_REDUCE_TASK
The reduce task number for which this map output is being transferred |
static String |
FROM_MAP_TASK
The map task from which the map output data is being transferred |
static int |
HEARTBEAT_INTERVAL_MIN
|
static org.apache.commons.logging.Log |
LOG
|
static String |
MAP_OUTPUT_LENGTH
The custom http header used for the map output length. |
static String |
RAW_MAP_OUTPUT_LENGTH
The custom http header used for the "raw" map output length. |
static int |
SUCCESS
|
static int |
TRACKERS_OK
|
static int |
UNKNOWN_TASKTRACKER
|
static long |
versionID
version 3 introduced to replace emitHearbeat/pollForNewTask/pollForTaskWithClosedJob with heartbeat(TaskTrackerStatus, boolean, boolean, boolean, short)
version 4 changed TaskReport for HADOOP-549. |
static long |
versionID
|
static String |
WORKDIR
|
Fields inherited from interface org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol |
---|
versionID |
Method Summary | |
---|---|
Collection<org.apache.hadoop.mapred.TaskTrackerStatus> |
activeTaskTrackers()
Get the active task tracker statuses in the cluster |
void |
addJobInProgressListener(org.apache.hadoop.mapred.JobInProgressListener listener)
Registers a JobInProgressListener for updates from this
TaskTrackerManager . |
Collection<org.apache.hadoop.mapred.TaskTrackerStatus> |
blacklistedTaskTrackers()
Get the blacklisted task tracker statuses in the cluster |
Vector<org.apache.hadoop.mapred.JobInProgress> |
completedJobs()
|
Vector<org.apache.hadoop.mapred.JobInProgress> |
failedJobs()
|
void |
failJob(org.apache.hadoop.mapred.JobInProgress job)
Fail a job and inform the listeners. |
static InetSocketAddress |
getAddress(Configuration conf)
|
JobStatus[] |
getAllJobs()
Get all the jobs submitted. |
String |
getAssignedTracker(TaskAttemptID taskId)
Get tracker name for a given task id. |
String |
getBuildVersion()
Returns the buildVersion of the JobTracker |
TaskReport[] |
getCleanupTaskReports(JobID jobid)
Grab a bunch of info on the cleanup tasks that make up the job |
ClusterStatus |
getClusterStatus()
Deprecated. use getClusterStatus(boolean) |
ClusterStatus |
getClusterStatus(boolean detailed)
Get the current status of the cluster |
String |
getFilesystemName()
Grab the local fs name |
int |
getInfoPort()
|
static Class<? extends org.apache.hadoop.mapred.JobTrackerInstrumentation> |
getInstrumentationClass(Configuration conf)
|
org.apache.hadoop.mapred.JobInProgress |
getJob(JobID jobid)
Obtain the job object identified by jobid |
Counters |
getJobCounters(JobID jobid)
Grab the current job counters |
JobProfile |
getJobProfile(JobID jobid)
Grab a handle to a job that is already known to the JobTracker. |
JobStatus[] |
getJobsFromQueue(String queue)
Gets all the jobs submitted to the particular Queue |
JobStatus |
getJobStatus(JobID jobid)
Grab a handle to a job that is already known to the JobTracker. |
String |
getJobTrackerMachine()
|
static String |
getLocalJobFilePath(JobID jobId)
Get the localized job file path on the job trackers local file system |
TaskReport[] |
getMapTaskReports(JobID jobid)
Grab a bunch of info on the map tasks that make up the job |
JobID |
getNewJobId()
Allocates a new JobId string. |
int |
getNextHeartbeatInterval()
Calculates next heartbeat interval using cluster size. |
Node |
getNode(String name)
Return the Node in the network topology that corresponds to the hostname |
Collection<Node> |
getNodesAtMaxLevel()
Returns a collection of nodes at the max level |
int |
getNumberOfUniqueHosts()
|
int |
getNumResolvedTaskTrackers()
|
int |
getNumTaskCacheLevels()
|
static Node |
getParentNode(Node node,
int level)
|
long |
getProtocolVersion(String protocol,
long clientVersion)
Return protocol version corresponding to protocol interface. |
JobQueueInfo |
getQueueInfo(String queue)
Gets scheduling information associated with the particular Job queue |
org.apache.hadoop.mapred.QueueManager |
getQueueManager()
Return the QueueManager associated with the JobTracker. |
JobQueueInfo[] |
getQueues()
Gets set of Job Queues associated with the Job Tracker |
long |
getRecoveryDuration()
How long the jobtracker took to recover from restart. |
TaskReport[] |
getReduceTaskReports(JobID jobid)
Grab a bunch of info on the reduce tasks that make up the job |
List<org.apache.hadoop.mapred.JobInProgress> |
getRunningJobs()
Version that is called from a timer thread, and therefore needs to be careful to synchronize. |
TaskReport[] |
getSetupTaskReports(JobID jobid)
Grab a bunch of info on the setup tasks that make up the job |
long |
getStartTime()
|
String |
getSystemDir()
Grab the jobtracker system directory path where job-specific files are to be placed. |
TaskCompletionEvent[] |
getTaskCompletionEvents(JobID jobid,
int fromEventId,
int maxEvents)
Get task completion events for the jobid, starting from fromEventId. |
String[] |
getTaskDiagnostics(TaskAttemptID taskId)
Get the diagnostics for a given task |
org.apache.hadoop.mapred.TaskTrackerStatus |
getTaskTracker(String trackerID)
|
org.apache.hadoop.mapred.TaskInProgress |
getTip(TaskID tipid)
Returns specified TaskInProgress, or null. |
int |
getTotalSubmissions()
|
String |
getTrackerIdentifier()
Get the unique identifier (ie. |
int |
getTrackerPort()
|
boolean |
hasRecovered()
Whether the JT has recovered upon restart |
boolean |
hasRestarted()
Whether the JT has restarted |
org.apache.hadoop.mapred.HeartbeatResponse |
heartbeat(org.apache.hadoop.mapred.TaskTrackerStatus status,
boolean restarted,
boolean initialContact,
boolean acceptNewTasks,
short responseId)
The periodic heartbeat mechanism between the TaskTracker and
the JobTracker . |
void |
initJob(org.apache.hadoop.mapred.JobInProgress job)
Initialize the Job |
boolean |
isBlacklisted(String trackerID)
Whether the tracker is blacklisted or not |
JobStatus[] |
jobsToComplete()
Get the jobs that are not completed and not failed |
void |
killJob(JobID jobid)
Kill the indicated job |
boolean |
killTask(TaskAttemptID taskid,
boolean shouldFail)
Mark a Task to be killed |
static void |
main(String[] argv)
Start the JobTracker process. |
void |
offerService()
Run forever |
void |
refreshServiceAcl()
Refresh the service-level authorization policy in-effect. |
void |
removeJobInProgressListener(org.apache.hadoop.mapred.JobInProgressListener listener)
Unregisters a JobInProgressListener from this
TaskTrackerManager . |
void |
reportTaskTrackerError(String taskTracker,
String errorClass,
String errorMessage)
Report a problem to the job tracker. |
Node |
resolveAndAddToTopology(String name)
|
Vector<org.apache.hadoop.mapred.JobInProgress> |
runningJobs()
|
static void |
setInstrumentationClass(Configuration conf,
Class<? extends org.apache.hadoop.mapred.JobTrackerInstrumentation> t)
|
void |
setJobPriority(JobID jobid,
String priority)
Set the priority of a job |
static JobTracker |
startTracker(JobConf conf)
Start the JobTracker with given configuration. |
static JobTracker |
startTracker(JobConf conf,
String identifier)
|
void |
stopTracker()
|
JobStatus |
submitJob(JobID jobId)
JobTracker.submitJob() kicks off a new job. |
List<List<String>> |
taskTrackerNames()
Get the active and blacklisted task tracker names in the cluster. |
Collection<org.apache.hadoop.mapred.TaskTrackerStatus> |
taskTrackers()
Get all the task trackers in the cluster |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final org.apache.commons.logging.Log LOG
public static final int HEARTBEAT_INTERVAL_MIN
public static final int CLUSTER_INCREMENT
public static final long COUNTER_UPDATE_INTERVAL
public static final int SUCCESS
public static final int FILE_NOT_FOUND
public static final String MAP_OUTPUT_LENGTH
public static final String RAW_MAP_OUTPUT_LENGTH
public static final String FROM_MAP_TASK
public static final String FOR_REDUCE_TASK
public static final String WORKDIR
public static final long versionID
heartbeat(TaskTrackerStatus, boolean, boolean, boolean, short)
version 4 changed TaskReport for HADOOP-549.
version 5 introduced that removes locateMapOutputs and instead uses
getTaskCompletionEvents to figure finished maps and fetch the outputs
version 6 adds maxTasks to TaskTrackerStatus for HADOOP-1245
version 7 replaces maxTasks by maxMapTasks and maxReduceTasks in
TaskTrackerStatus for HADOOP-1274
Version 8: HeartbeatResponse is added with the next heartbeat interval.
version 9 changes the counter representation for HADOOP-2248
version 10 changes the TaskStatus representation for HADOOP-2208
version 11 changes string to JobID in getTaskCompletionEvents().
version 12 changes the counters representation for HADOOP-1915
version 13 added call getBuildVersion() for HADOOP-236
Version 14: replaced getFilesystemName with getSystemDir for HADOOP-3135
Version 15: Changed format of Task and TaskStatus for HADOOP-153
Version 16: adds ResourceStatus to TaskTrackerStatus for HADOOP-3759
Version 17: Changed format of Task and TaskStatus for HADOOP-3150
Version 18: Changed status message due to changes in TaskStatus
Version 19: Changed heartbeat to piggyback JobTracker restart information
so that the TaskTracker can synchronize itself.
Version 20: Changed status message due to changes in TaskStatus
(HADOOP-4232)
Version 21: Changed information reported in TaskTrackerStatus'
ResourceStatus and the corresponding accessor methods
(HADOOP-4035)
Version 22: Replaced parameter 'initialContact' with 'restarted'
in heartbeat method (HADOOP-4305)
Version 23: Added parameter 'initialContact' again in heartbeat method
(HADOOP-4869)
Version 24: Changed format of Task and TaskStatus for HADOOP-4759
Version 25: JobIDs are passed in response to JobTracker restart
public static final int TRACKERS_OK
public static final int UNKNOWN_TASKTRACKER
public static final long versionID
Method Detail |
---|
public static JobTracker startTracker(JobConf conf) throws IOException, InterruptedException
zero
.
conf
- configuration for the JobTracker.
IOException
InterruptedException
public static JobTracker startTracker(JobConf conf, String identifier) throws IOException, InterruptedException
IOException
InterruptedException
public void stopTracker() throws IOException
IOException
public long getProtocolVersion(String protocol, long clientVersion) throws IOException
VersionedProtocol
getProtocolVersion
in interface VersionedProtocol
protocol
- The classname of the protocol interfaceclientVersion
- The version of the protocol that the client speaks
IOException
public boolean hasRestarted()
public boolean hasRecovered()
public long getRecoveryDuration()
public static Class<? extends org.apache.hadoop.mapred.JobTrackerInstrumentation> getInstrumentationClass(Configuration conf)
public static void setInstrumentationClass(Configuration conf, Class<? extends org.apache.hadoop.mapred.JobTrackerInstrumentation> t)
public static InetSocketAddress getAddress(Configuration conf)
public void offerService() throws InterruptedException, IOException
InterruptedException
IOException
public int getTotalSubmissions()
public String getJobTrackerMachine()
public String getTrackerIdentifier()
public int getTrackerPort()
public int getInfoPort()
public long getStartTime()
public Vector<org.apache.hadoop.mapred.JobInProgress> runningJobs()
public List<org.apache.hadoop.mapred.JobInProgress> getRunningJobs()
public Vector<org.apache.hadoop.mapred.JobInProgress> failedJobs()
public Vector<org.apache.hadoop.mapred.JobInProgress> completedJobs()
public Collection<org.apache.hadoop.mapred.TaskTrackerStatus> taskTrackers()
Collection
of TaskTrackerStatus
public Collection<org.apache.hadoop.mapred.TaskTrackerStatus> activeTaskTrackers()
Collection
of active TaskTrackerStatus
public List<List<String>> taskTrackerNames()
public Collection<org.apache.hadoop.mapred.TaskTrackerStatus> blacklistedTaskTrackers()
Collection
of blacklisted TaskTrackerStatus
public boolean isBlacklisted(String trackerID)
trackerID
-
public org.apache.hadoop.mapred.TaskTrackerStatus getTaskTracker(String trackerID)
public Node resolveAndAddToTopology(String name)
public Collection<Node> getNodesAtMaxLevel()
public static Node getParentNode(Node node, int level)
public Node getNode(String name)
public int getNumTaskCacheLevels()
public int getNumResolvedTaskTrackers()
public int getNumberOfUniqueHosts()
public void addJobInProgressListener(org.apache.hadoop.mapred.JobInProgressListener listener)
JobInProgressListener
for updates from this
TaskTrackerManager
.
public void removeJobInProgressListener(org.apache.hadoop.mapred.JobInProgressListener listener)
JobInProgressListener
from this
TaskTrackerManager
.
public org.apache.hadoop.mapred.QueueManager getQueueManager()
QueueManager
associated with the JobTracker.
QueueManager
public String getBuildVersion() throws IOException
IOException
public org.apache.hadoop.mapred.HeartbeatResponse heartbeat(org.apache.hadoop.mapred.TaskTrackerStatus status, boolean restarted, boolean initialContact, boolean acceptNewTasks, short responseId) throws IOException
TaskTracker
and
the JobTracker
.
The JobTracker
processes the status information sent by the
TaskTracker
and responds with instructions to start/stop
tasks or jobs, and also 'reset' instructions during contingencies.
status
- the status updaterestarted
- true
if the process has just started or
restarted, false
otherwiseinitialContact
- true
if this is first interaction since
'refresh', false
otherwise.acceptNewTasks
- true
if the TaskTracker
is
ready to accept new tasks to run.responseId
- the last responseId successfully acted upon by the
TaskTracker
.
HeartbeatResponse
with
fresh instructions.
IOException
public int getNextHeartbeatInterval()
public String getFilesystemName() throws IOException
IOException
public void reportTaskTrackerError(String taskTracker, String errorClass, String errorMessage) throws IOException
taskTracker
- the name of the task trackererrorClass
- the kind of error (eg. the class that was thrown)errorMessage
- the human readable error message
IOException
- if there was a problem in communication or on the
remote sidepublic JobID getNewJobId() throws IOException
IOException
public JobStatus submitJob(JobID jobId) throws IOException
IOException
@Deprecated public ClusterStatus getClusterStatus()
getClusterStatus(boolean)
public ClusterStatus getClusterStatus(boolean detailed)
detailed
- if true then report tracker names as well
public void killJob(JobID jobid) throws IOException
IOException
public void initJob(org.apache.hadoop.mapred.JobInProgress job)
job
- JobInProgress objectpublic void failJob(org.apache.hadoop.mapred.JobInProgress job)
job
- JobInProgress objectpublic void setJobPriority(JobID jobid, String priority) throws IOException
jobid
- id of the jobpriority
- new priority of the job
IOException
public JobProfile getJobProfile(JobID jobid)
public JobStatus getJobStatus(JobID jobid)
public Counters getJobCounters(JobID jobid)
public TaskReport[] getMapTaskReports(JobID jobid)
public TaskReport[] getReduceTaskReports(JobID jobid)
public TaskReport[] getCleanupTaskReports(JobID jobid)
public TaskReport[] getSetupTaskReports(JobID jobid)
public TaskCompletionEvent[] getTaskCompletionEvents(JobID jobid, int fromEventId, int maxEvents) throws IOException
jobid
- job idfromEventId
- event id to start from.maxEvents
- the max number of events we want to look at
IOException
public String[] getTaskDiagnostics(TaskAttemptID taskId) throws IOException
taskId
- the id of the task
IOException
public org.apache.hadoop.mapred.TaskInProgress getTip(TaskID tipid)
public boolean killTask(TaskAttemptID taskid, boolean shouldFail) throws IOException
taskid
- the id of the task to kill.shouldFail
- if true the task is failed and added to failed tasks list, otherwise
it is just killed, w/o affecting job failure status.
IOException
public String getAssignedTracker(TaskAttemptID taskId)
taskId
- the name of the task
public JobStatus[] jobsToComplete()
public JobStatus[] getAllJobs()
public String getSystemDir()
JobSubmissionProtocol.getSystemDir()
public org.apache.hadoop.mapred.JobInProgress getJob(JobID jobid)
public static String getLocalJobFilePath(JobID jobId)
jobId
- id of the job
public static void main(String[] argv) throws IOException, InterruptedException
IOException
InterruptedException
public JobQueueInfo[] getQueues() throws IOException
IOException
public JobQueueInfo getQueueInfo(String queue) throws IOException
queue
- Queue Name
IOException
public JobStatus[] getJobsFromQueue(String queue) throws IOException
queue
- Queue name
IOException
public void refreshServiceAcl() throws IOException
RefreshAuthorizationPolicyProtocol
refreshServiceAcl
in interface RefreshAuthorizationPolicyProtocol
IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |