public class TaskSchedulerImpl extends Object implements TaskScheduler, Logging
Clients should first call initialize() and start(), then submit task sets through the runTasks method.
THREADING: SchedulerBackends and task-submitting clients can call this class from multiple threads, so it needs locks in public API methods to maintain its state. In addition, some SchedulerBackends synchronize on themselves when they want to send events here, and then acquire a lock on us, so we need to make sure that we don't try to lock the backend while we are holding a lock on ourselves.
Constructor and Description |
---|
TaskSchedulerImpl(SparkContext sc) |
TaskSchedulerImpl(SparkContext sc,
int maxTaskFailures,
boolean isLocal) |
Modifier and Type | Method and Description |
---|---|
scala.collection.mutable.HashSet<String> |
activeExecutorIds() |
scala.collection.mutable.HashMap<String,TaskSetManager> |
activeTaskSets() |
String |
applicationId()
Get an application ID associated with the job.
|
SchedulerBackend |
backend() |
void |
cancelTasks(int stageId,
boolean interruptThread) |
void |
checkSpeculatableTasks() |
SparkConf |
conf() |
int |
CPUS_PER_TASK() |
TaskSetManager |
createTaskSetManager(TaskSet taskSet,
int maxTaskFailures) |
DAGScheduler |
dagScheduler() |
int |
defaultParallelism() |
void |
error(String message) |
void |
executorAdded(String execId,
String host) |
boolean |
executorHeartbeatReceived(String execId,
scala.Tuple2<Object,org.apache.spark.executor.TaskMetrics>[] taskMetrics,
BlockManagerId blockManagerId)
Update metrics for in-progress tasks and let the master know that the BlockManager is still
alive.
|
void |
executorLost(String executorId,
ExecutorLossReason reason) |
scala.Option<scala.collection.immutable.Set<String>> |
getExecutorsAliveOnHost(String host) |
scala.Option<String> |
getRackForHost(String value) |
void |
handleFailedTask(TaskSetManager taskSetManager,
long tid,
scala.Enumeration.Value taskState,
TaskEndReason reason) |
void |
handleSuccessfulTask(TaskSetManager taskSetManager,
long tid,
DirectTaskResult<?> taskResult) |
void |
handleTaskGettingResult(TaskSetManager taskSetManager,
long tid) |
boolean |
hasExecutorsAliveOnHost(String host) |
boolean |
hasHostAliveOnRack(String rack) |
void |
initialize(SchedulerBackend backend) |
boolean |
isExecutorAlive(String execId) |
MapOutputTracker |
mapOutputTracker() |
int |
maxTaskFailures() |
long |
newTaskId() |
java.util.concurrent.atomic.AtomicLong |
nextTaskId() |
void |
postStartHook() |
static <K,T> scala.collection.immutable.List<T> |
prioritizeContainers(scala.collection.mutable.HashMap<K,scala.collection.mutable.ArrayBuffer<T>> map)
Used to balance containers across hosts.
|
scala.collection.Seq<scala.collection.Seq<TaskDescription>> |
resourceOffers(scala.collection.Seq<WorkerOffer> offers)
Called by cluster manager to offer resources on slaves.
|
Pool |
rootPool() |
SparkContext |
sc() |
SchedulableBuilder |
schedulableBuilder() |
scala.Enumeration.Value |
schedulingMode() |
void |
setDAGScheduler(DAGScheduler dagScheduler) |
long |
SPECULATION_INTERVAL() |
void |
start() |
long |
STARVATION_TIMEOUT() |
void |
statusUpdate(long tid,
scala.Enumeration.Value state,
java.nio.ByteBuffer serializedData) |
void |
stop() |
void |
submitTasks(TaskSet taskSet) |
scala.collection.mutable.HashMap<Object,String> |
taskIdToExecutorId() |
scala.collection.mutable.HashMap<Object,String> |
taskIdToTaskSetId() |
TaskResultGetter |
taskResultGetter() |
void |
taskSetFinished(TaskSetManager manager)
Called to indicate that all task attempts (including speculated tasks) associated with the
given TaskSetManager have completed, so state associated with the TaskSetManager should be
cleaned up.
|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
appId
initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
public TaskSchedulerImpl(SparkContext sc, int maxTaskFailures, boolean isLocal)
public TaskSchedulerImpl(SparkContext sc)
public static <K,T> scala.collection.immutable.List<T> prioritizeContainers(scala.collection.mutable.HashMap<K,scala.collection.mutable.ArrayBuffer<T>> map)
Accepts a map of hosts to resource offers for that host, and returns a prioritized list of resource offers representing the order in which the offers should be used. The resource offers are ordered such that we'll allocate one container on each host before allocating a second container on any host, and so on, in order to reduce the damage if a host fails.
For example, given
public SparkContext sc()
public int maxTaskFailures()
public SparkConf conf()
public long SPECULATION_INTERVAL()
public long STARVATION_TIMEOUT()
public int CPUS_PER_TASK()
public scala.collection.mutable.HashMap<String,TaskSetManager> activeTaskSets()
public scala.collection.mutable.HashMap<Object,String> taskIdToTaskSetId()
public scala.collection.mutable.HashMap<Object,String> taskIdToExecutorId()
public java.util.concurrent.atomic.AtomicLong nextTaskId()
public scala.collection.mutable.HashSet<String> activeExecutorIds()
public DAGScheduler dagScheduler()
public SchedulerBackend backend()
public MapOutputTracker mapOutputTracker()
public SchedulableBuilder schedulableBuilder()
public Pool rootPool()
rootPool
in interface TaskScheduler
public scala.Enumeration.Value schedulingMode()
schedulingMode
in interface TaskScheduler
public TaskResultGetter taskResultGetter()
public void setDAGScheduler(DAGScheduler dagScheduler)
setDAGScheduler
in interface TaskScheduler
public void initialize(SchedulerBackend backend)
public long newTaskId()
public void start()
start
in interface TaskScheduler
public void postStartHook()
postStartHook
in interface TaskScheduler
public void submitTasks(TaskSet taskSet)
submitTasks
in interface TaskScheduler
public TaskSetManager createTaskSetManager(TaskSet taskSet, int maxTaskFailures)
public void cancelTasks(int stageId, boolean interruptThread)
cancelTasks
in interface TaskScheduler
public void taskSetFinished(TaskSetManager manager)
public scala.collection.Seq<scala.collection.Seq<TaskDescription>> resourceOffers(scala.collection.Seq<WorkerOffer> offers)
public void statusUpdate(long tid, scala.Enumeration.Value state, java.nio.ByteBuffer serializedData)
public boolean executorHeartbeatReceived(String execId, scala.Tuple2<Object,org.apache.spark.executor.TaskMetrics>[] taskMetrics, BlockManagerId blockManagerId)
executorHeartbeatReceived
in interface TaskScheduler
public void handleTaskGettingResult(TaskSetManager taskSetManager, long tid)
public void handleSuccessfulTask(TaskSetManager taskSetManager, long tid, DirectTaskResult<?> taskResult)
public void handleFailedTask(TaskSetManager taskSetManager, long tid, scala.Enumeration.Value taskState, TaskEndReason reason)
public void error(String message)
public void stop()
stop
in interface TaskScheduler
public int defaultParallelism()
defaultParallelism
in interface TaskScheduler
public void checkSpeculatableTasks()
public void executorLost(String executorId, ExecutorLossReason reason)
public void executorAdded(String execId, String host)
public scala.Option<scala.collection.immutable.Set<String>> getExecutorsAliveOnHost(String host)
public boolean hasExecutorsAliveOnHost(String host)
public boolean hasHostAliveOnRack(String rack)
public boolean isExecutorAlive(String execId)
public scala.Option<String> getRackForHost(String value)
public String applicationId()
TaskScheduler
applicationId
in interface TaskScheduler