|
18 | 18 |
|
19 | 19 | package org.apache.flink.runtime.executiongraph;
|
20 | 20 |
|
21 |
| -import akka.actor.ActorSystem; |
22 | 21 | import org.apache.flink.api.common.ExecutionConfig;
|
23 | 22 | import org.apache.flink.api.common.JobID;
|
24 | 23 | import org.apache.flink.api.common.accumulators.Accumulator;
|
|
41 | 40 | import org.apache.flink.runtime.execution.ExecutionState;
|
42 | 41 | import org.apache.flink.runtime.execution.SuppressRestartsException;
|
43 | 42 | import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
|
44 |
| -import org.apache.flink.runtime.instance.ActorGateway; |
45 | 43 | import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
|
46 | 44 | import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
|
47 | 45 | import org.apache.flink.runtime.jobgraph.JobStatus;
|
|
50 | 48 | import org.apache.flink.runtime.jobgraph.ScheduleMode;
|
51 | 49 | import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
|
52 | 50 | import org.apache.flink.runtime.jobmanager.scheduler.Scheduler;
|
53 |
| -import org.apache.flink.runtime.messages.ExecutionGraphMessages; |
54 | 51 | import org.apache.flink.runtime.query.KvStateLocationRegistry;
|
55 | 52 | import org.apache.flink.runtime.taskmanager.TaskExecutionState;
|
56 | 53 | import org.apache.flink.runtime.util.SerializableObject;
|
57 | 54 | import org.apache.flink.runtime.util.SerializedThrowable;
|
58 | 55 | import org.apache.flink.util.ExceptionUtils;
|
59 | 56 | import org.apache.flink.util.SerializedValue;
|
| 57 | + |
60 | 58 | import org.slf4j.Logger;
|
61 | 59 | import org.slf4j.LoggerFactory;
|
| 60 | + |
62 | 61 | import scala.concurrent.ExecutionContext;
|
63 | 62 | import scala.concurrent.duration.FiniteDuration;
|
64 | 63 |
|
|
75 | 74 | import java.util.Map;
|
76 | 75 | import java.util.NoSuchElementException;
|
77 | 76 | import java.util.Objects;
|
78 |
| -import java.util.UUID; |
79 | 77 | import java.util.concurrent.ConcurrentHashMap;
|
80 | 78 | import java.util.concurrent.CopyOnWriteArrayList;
|
81 | 79 | import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
|
82 | 80 |
|
83 | 81 | import static org.apache.flink.util.Preconditions.checkNotNull;
|
| 82 | + |
84 | 83 | /**
|
85 | 84 | * The execution graph is the central data structure that coordinates the distributed
|
86 | 85 | * execution of a data flow. It keeps representations of each parallel task, each
|
@@ -151,12 +150,12 @@ public class ExecutionGraph {
|
151 | 150 | * accessible on all nodes in the cluster. */
|
152 | 151 | private final List<URL> requiredClasspaths;
|
153 | 152 |
|
154 |
| - /** Listeners that receive messages when the entire job switches it status (such as from |
155 |
| - * RUNNING to FINISHED) */ |
156 |
| - private final List<ActorGateway> jobStatusListenerActors; |
| 153 | + /** Listeners that receive messages when the entire job switches it status |
| 154 | + * (such as from RUNNING to FINISHED) */ |
| 155 | + private final List<JobStatusListener> jobStatusListeners; |
157 | 156 |
|
158 | 157 | /** Listeners that receive messages whenever a single task execution changes its status */
|
159 |
| - private final List<ActorGateway> executionListenerActors; |
| 158 | + private final List<ExecutionStatusListener> executionListeners; |
160 | 159 |
|
161 | 160 | /** Timestamps (in milliseconds as returned by {@code System.currentTimeMillis()} when
|
162 | 161 | * the execution graph transitioned into a certain state. The index into this array is the
|
@@ -284,8 +283,8 @@ public ExecutionGraph(
|
284 | 283 | this.verticesInCreationOrder = new ArrayList<ExecutionJobVertex>();
|
285 | 284 | this.currentExecutions = new ConcurrentHashMap<ExecutionAttemptID, Execution>();
|
286 | 285 |
|
287 |
| - this.jobStatusListenerActors = new CopyOnWriteArrayList<ActorGateway>(); |
288 |
| - this.executionListenerActors = new CopyOnWriteArrayList<ActorGateway>(); |
| 286 | + this.jobStatusListeners = new CopyOnWriteArrayList<>(); |
| 287 | + this.executionListeners = new CopyOnWriteArrayList<>(); |
289 | 288 |
|
290 | 289 | this.stateTimestamps = new long[JobStatus.values().length];
|
291 | 290 | this.stateTimestamps[JobStatus.CREATED.ordinal()] = System.currentTimeMillis();
|
@@ -345,8 +344,6 @@ public void enableSnapshotCheckpointing(
|
345 | 344 | List<ExecutionJobVertex> verticesToTrigger,
|
346 | 345 | List<ExecutionJobVertex> verticesToWaitFor,
|
347 | 346 | List<ExecutionJobVertex> verticesToCommitTo,
|
348 |
| - ActorSystem actorSystem, |
349 |
| - UUID leaderSessionID, |
350 | 347 | CheckpointIDCounter checkpointIDCounter,
|
351 | 348 | CompletedCheckpointStore checkpointStore,
|
352 | 349 | SavepointStore savepointStore,
|
@@ -388,8 +385,7 @@ public void enableSnapshotCheckpointing(
|
388 | 385 |
|
389 | 386 | // the periodic checkpoint scheduler is activated and deactivated as a result of
|
390 | 387 | // job status changes (running -> on, all other states -> off)
|
391 |
| - registerJobStatusListener( |
392 |
| - checkpointCoordinator.createActivatorDeactivator(actorSystem, leaderSessionID)); |
| 388 | + registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator()); |
393 | 389 | }
|
394 | 390 |
|
395 | 391 | /**
|
@@ -935,8 +931,8 @@ public void prepareForArchiving() {
|
935 | 931 | intermediateResults.clear();
|
936 | 932 | currentExecutions.clear();
|
937 | 933 | requiredJarFiles.clear();
|
938 |
| - jobStatusListenerActors.clear(); |
939 |
| - executionListenerActors.clear(); |
| 934 | + jobStatusListeners.clear(); |
| 935 | + executionListeners.clear(); |
940 | 936 |
|
941 | 937 | isArchived = true;
|
942 | 938 | }
|
@@ -1173,45 +1169,52 @@ public void updateAccumulators(AccumulatorSnapshot accumulatorSnapshot) {
|
1173 | 1169 | // Listeners & Observers
|
1174 | 1170 | // --------------------------------------------------------------------------------------------
|
1175 | 1171 |
|
1176 |
| - public void registerJobStatusListener(ActorGateway listener) { |
| 1172 | + public void registerJobStatusListener(JobStatusListener listener) { |
1177 | 1173 | if (listener != null) {
|
1178 |
| - this.jobStatusListenerActors.add(listener); |
| 1174 | + jobStatusListeners.add(listener); |
1179 | 1175 | }
|
1180 | 1176 | }
|
1181 | 1177 |
|
1182 |
| - public void registerExecutionListener(ActorGateway listener) { |
| 1178 | + public void registerExecutionListener(ExecutionStatusListener listener) { |
1183 | 1179 | if (listener != null) {
|
1184 |
| - this.executionListenerActors.add(listener); |
| 1180 | + executionListeners.add(listener); |
1185 | 1181 | }
|
1186 | 1182 | }
|
1187 | 1183 |
|
1188 | 1184 | private void notifyJobStatusChange(JobStatus newState, Throwable error) {
|
1189 |
| - if (jobStatusListenerActors.size() > 0) { |
1190 |
| - ExecutionGraphMessages.JobStatusChanged message = |
1191 |
| - new ExecutionGraphMessages.JobStatusChanged(jobID, newState, System.currentTimeMillis(), |
1192 |
| - error == null ? null : new SerializedThrowable(error)); |
1193 |
| - |
1194 |
| - for (ActorGateway listener: jobStatusListenerActors) { |
1195 |
| - listener.tell(message); |
| 1185 | + if (jobStatusListeners.size() > 0) { |
| 1186 | + final long timestamp = System.currentTimeMillis(); |
| 1187 | + final Throwable serializedError = error == null ? null : new SerializedThrowable(error); |
| 1188 | + |
| 1189 | + for (JobStatusListener listener : jobStatusListeners) { |
| 1190 | + try { |
| 1191 | + listener.jobStatusChanges(jobID, newState, timestamp, serializedError); |
| 1192 | + } catch (Throwable t) { |
| 1193 | + LOG.warn("Error while notifying JobStatusListener", t); |
| 1194 | + } |
1196 | 1195 | }
|
1197 | 1196 | }
|
1198 | 1197 | }
|
1199 | 1198 |
|
1200 |
| - void notifyExecutionChange(JobVertexID vertexId, int subtask, ExecutionAttemptID executionID, ExecutionState |
1201 |
| - newExecutionState, Throwable error) |
| 1199 | + void notifyExecutionChange( |
| 1200 | + JobVertexID vertexId, int subtask, ExecutionAttemptID executionID, |
| 1201 | + ExecutionState newExecutionState, Throwable error) |
1202 | 1202 | {
|
1203 | 1203 | ExecutionJobVertex vertex = getJobVertex(vertexId);
|
1204 | 1204 |
|
1205 |
| - if (executionListenerActors.size() > 0) { |
1206 |
| - String message = error == null ? null : ExceptionUtils.stringifyException(error); |
1207 |
| - ExecutionGraphMessages.ExecutionStateChanged actorMessage = |
1208 |
| - new ExecutionGraphMessages.ExecutionStateChanged(jobID, vertexId, vertex.getJobVertex().getName(), |
1209 |
| - vertex.getParallelism(), subtask, |
1210 |
| - executionID, newExecutionState, |
1211 |
| - System.currentTimeMillis(), message); |
1212 |
| - |
1213 |
| - for (ActorGateway listener : executionListenerActors) { |
1214 |
| - listener.tell(actorMessage); |
| 1205 | + if (executionListeners.size() > 0) { |
| 1206 | + final String message = error == null ? null : ExceptionUtils.stringifyException(error); |
| 1207 | + final long timestamp = System.currentTimeMillis(); |
| 1208 | + |
| 1209 | + for (ExecutionStatusListener listener : executionListeners) { |
| 1210 | + try { |
| 1211 | + listener.executionStatusChanged( |
| 1212 | + jobID, vertexId, vertex.getJobVertex().getName(), |
| 1213 | + vertex.getParallelism(), subtask, executionID, newExecutionState, |
| 1214 | + timestamp, message); |
| 1215 | + } catch (Throwable t) { |
| 1216 | + LOG.warn("Error while notifying ExecutionStatusListener", t); |
| 1217 | + } |
1215 | 1218 | }
|
1216 | 1219 | }
|
1217 | 1220 |
|
|
0 commit comments