|
7 | 7 | package org.elasticsearch.xpack.ml.job.process.autodetect;
|
8 | 8 |
|
9 | 9 | import org.elasticsearch.ElasticsearchException;
|
| 10 | +import org.elasticsearch.ResourceNotFoundException; |
10 | 11 | import org.elasticsearch.action.ActionListener;
|
11 | 12 | import org.elasticsearch.action.ActionType;
|
12 | 13 | import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
|
33 | 34 | import org.elasticsearch.index.analysis.AnalysisRegistry;
|
34 | 35 | import org.elasticsearch.indices.TestIndexNameExpressionResolver;
|
35 | 36 | import org.elasticsearch.license.XPackLicenseState;
|
| 37 | +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; |
36 | 38 | import org.elasticsearch.persistent.PersistentTasksService;
|
37 | 39 | import org.elasticsearch.tasks.TaskId;
|
38 | 40 | import org.elasticsearch.tasks.TaskManager;
|
|
91 | 93 | import java.util.Optional;
|
92 | 94 | import java.util.concurrent.Callable;
|
93 | 95 | import java.util.concurrent.CountDownLatch;
|
| 96 | +import java.util.concurrent.Executor; |
94 | 97 | import java.util.concurrent.ExecutorService;
|
95 | 98 | import java.util.concurrent.Future;
|
96 | 99 | import java.util.concurrent.TimeUnit;
|
@@ -253,6 +256,9 @@ public void setup() throws Exception {
|
253 | 256 | handler.accept(buildAutodetectParams());
|
254 | 257 | return null;
|
255 | 258 | }).when(jobResultsProvider).getAutodetectParams(any(), any(), any());
|
| 259 | + |
| 260 | + // when running retry logic use the real executor service |
| 261 | + when(threadPool.generic()).thenReturn(EsExecutors.DIRECT_EXECUTOR_SERVICE); |
256 | 262 | }
|
257 | 263 |
|
258 | 264 | public void testOpenJob() {
|
@@ -845,6 +851,141 @@ public void testGetOpenProcessMemoryUsage() {
|
845 | 851 | assertThat(manager.getOpenProcessMemoryUsage(), equalTo(ByteSizeValue.ofBytes(expectedSizeBytes)));
|
846 | 852 | }
|
847 | 853 |
|
| 854 | + public void testSetJobState_withoutHandler_invokesPersistentTaskUpdate() { |
| 855 | + AutodetectProcessManager manager = createSpyManager(); |
| 856 | + JobTask jobTask = mock(JobTask.class); |
| 857 | + when(jobTask.getAllocationId()).thenReturn(123L); |
| 858 | + when(jobTask.getJobId()).thenReturn("job-123"); |
| 859 | + |
| 860 | + // call the no-handler overload |
| 861 | + manager.setJobState(jobTask, JobState.CLOSING, "closing-reason"); |
| 862 | + |
| 863 | + // verify we called updatePersistentTaskState with the expected state |
| 864 | + @SuppressWarnings("unchecked") |
| 865 | + ArgumentCaptor<JobTaskState> stateCaptor = ArgumentCaptor.forClass(JobTaskState.class); |
| 866 | + verify(jobTask).updatePersistentTaskState(stateCaptor.capture(), any()); |
| 867 | + JobTaskState captured = stateCaptor.getValue(); |
| 868 | + assertEquals(JobState.CLOSING, captured.getState()); |
| 869 | + assertEquals(123L, captured.getAllocationId()); |
| 870 | + assertEquals("closing-reason", captured.getReason()); |
| 871 | + } |
| 872 | + |
| 873 | + public void testSetJobState_withHandler_onResponse_triggersHandlerNull() throws IOException { |
| 874 | + // This test verifies the “happy‐path” of the retryable overload—i.e. what happens when the very first call |
| 875 | + // to updatePersistentTaskState succeeds. On a successful state update it must invoke handler.accept(null) |
| 876 | + // (because there was no error). |
| 877 | + AutodetectProcessManager manager = createSpyManager(); |
| 878 | + JobTask jobTask = mock(JobTask.class); |
| 879 | + |
| 880 | + // stub updatePersistentTaskState to call onResponse |
| 881 | + doAnswer(invocation -> { |
| 882 | + @SuppressWarnings("unchecked") |
| 883 | + ActionListener<PersistentTasksCustomMetadata.PersistentTask<?>> listener = (ActionListener< |
| 884 | + PersistentTasksCustomMetadata.PersistentTask<?>>) invocation.getArguments()[1]; |
| 885 | + listener.onResponse(null); |
| 886 | + return null; |
| 887 | + }).when(jobTask).updatePersistentTaskState(any(), any()); |
| 888 | + |
| 889 | + AtomicReference<Exception> holder = new AtomicReference<>(); |
| 890 | + CheckedConsumer<Exception, IOException> handler = holder::set; |
| 891 | + |
| 892 | + manager.setJobState(jobTask, JobState.FAILED, "fail-reason", handler); |
| 893 | + |
| 894 | + // onResponse should have driven handler.accept(null) |
| 895 | + assertNull(holder.get()); |
| 896 | + verify(jobTask).updatePersistentTaskState(any(JobTaskState.class), any()); |
| 897 | + } |
| 898 | + |
| 899 | + public void testSetJobState_withHandler_onFailure_triggersHandlerException() throws IOException { |
| 900 | + // Verifies that when updatePersistentTaskState reports a failure, the handler receives that exception |
| 901 | + when(threadPool.schedule(any(Runnable.class), any(TimeValue.class), any(Executor.class))) |
| 902 | + .thenAnswer(invocation -> { |
| 903 | + Runnable r = invocation.getArgument(0); |
| 904 | + r.run(); |
| 905 | + return mock(ThreadPool.Cancellable.class); |
| 906 | + }); |
| 907 | + AutodetectProcessManager manager = createSpyManager(); |
| 908 | + JobTask jobTask = mock(JobTask.class); |
| 909 | + ResourceNotFoundException boom = new ResourceNotFoundException("boom"); |
| 910 | + doAnswer(invocation -> { |
| 911 | + @SuppressWarnings("unchecked") |
| 912 | + ActionListener<PersistentTasksCustomMetadata.PersistentTask<?>> listener = |
| 913 | + (ActionListener<PersistentTasksCustomMetadata.PersistentTask<?>>) invocation.getArguments()[1]; |
| 914 | + listener.onFailure(boom); |
| 915 | + return null; |
| 916 | + }).when(jobTask).updatePersistentTaskState(any(), any()); |
| 917 | + |
| 918 | + AtomicReference<Exception> holder = new AtomicReference<>(); |
| 919 | + CheckedConsumer<Exception, IOException> handler = holder::set; |
| 920 | + |
| 921 | + manager.setJobState(jobTask, JobState.FAILED, "fail-reason", handler); |
| 922 | + |
| 923 | + // onFailure should have driven handler.accept(boom) |
| 924 | + assertSame(boom, holder.get()); |
| 925 | + verify(jobTask).updatePersistentTaskState(any(JobTaskState.class), any()); |
| 926 | + } |
| 927 | + |
| 928 | + public void testSetJobState_withHandler_retriesUntilSuccess() throws IOException { |
| 929 | + // Verifies that transient failures are retried until eventual success, and the handler receives null on success |
| 930 | + |
| 931 | + // ensure that all retries are executed on the same thread for determinism |
| 932 | + when(threadPool.schedule(any(Runnable.class), any(TimeValue.class), any(Executor.class))).thenAnswer(invocation -> { |
| 933 | + Runnable r = invocation.getArgument(0); |
| 934 | + r.run(); |
| 935 | + return mock(ThreadPool.Cancellable.class); |
| 936 | + }); |
| 937 | + AutodetectProcessManager manager = createSpyManager(); |
| 938 | + JobTask jobTask = mock(JobTask.class); |
| 939 | + AtomicInteger attempts = new AtomicInteger(); |
| 940 | + doAnswer(invocation -> { |
| 941 | + // Simulate transient failures for the first two attempts, then succeed on the third |
| 942 | + @SuppressWarnings("unchecked") |
| 943 | + ActionListener<PersistentTasksCustomMetadata.PersistentTask<?>> listener = (ActionListener< |
| 944 | + PersistentTasksCustomMetadata.PersistentTask<?>>) invocation.getArguments()[1]; |
| 945 | + if (attempts.incrementAndGet() < 3) { |
| 946 | + listener.onFailure(new RuntimeException("transient failure")); |
| 947 | + } else { |
| 948 | + listener.onResponse(null); |
| 949 | + } |
| 950 | + return null; |
| 951 | + }).when(jobTask).updatePersistentTaskState(any(), any()); |
| 952 | + |
| 953 | + AtomicReference<Exception> holder = new AtomicReference<>(); |
| 954 | + CheckedConsumer<Exception, IOException> handler = holder::set; |
| 955 | + |
| 956 | + manager.setJobState(jobTask, JobState.OPENED, "retry-test", handler); |
| 957 | + |
| 958 | + // confirms that the method was called exactly three times (two failures then one success). |
| 959 | + verify(jobTask, times(3)).updatePersistentTaskState(any(JobTaskState.class), any()); |
| 960 | + assertNull(holder.get()); |
| 961 | + } |
| 962 | + |
| 963 | + public void testSetJobState_withHandler_noRetryOnResourceNotFound() throws IOException { |
| 964 | + // Ensures that if the persistent‐state update fails with a ResourceNotFoundException, the retry loop does not retry |
| 965 | + // again but immediately invokes the user’s handler with that exception. |
| 966 | + AutodetectProcessManager manager = createSpyManager(); |
| 967 | + JobTask jobTask = mock(JobTask.class); |
| 968 | + ResourceNotFoundException rnfe = new ResourceNotFoundException("not found"); |
| 969 | + doAnswer(invocation -> { |
| 970 | + // Simulate a ResourceNotFoundException that should not be retried |
| 971 | + @SuppressWarnings("unchecked") |
| 972 | + ActionListener<PersistentTasksCustomMetadata.PersistentTask<?>> listener = (ActionListener< |
| 973 | + PersistentTasksCustomMetadata.PersistentTask<?>>) invocation.getArguments()[1]; |
| 974 | + listener.onFailure(rnfe); |
| 975 | + return null; |
| 976 | + }).when(jobTask).updatePersistentTaskState(any(), any()); |
| 977 | + |
| 978 | + AtomicReference<Exception> holder = new AtomicReference<>(); |
| 979 | + CheckedConsumer<Exception, IOException> handler = holder::set; |
| 980 | + |
| 981 | + manager.setJobState(jobTask, JobState.OPENED, "rnfe-test", handler); |
| 982 | + |
| 983 | + // updatePersistentTaskState(...) was invoked exactly once (no retries). |
| 984 | + verify(jobTask, times(1)).updatePersistentTaskState(any(JobTaskState.class), any()); |
| 985 | + // The handler should have been invoked with the ResourceNotFoundException |
| 986 | + assertSame(rnfe, holder.get()); |
| 987 | + } |
| 988 | + |
848 | 989 | private AutodetectProcessManager createNonSpyManager(String jobId) {
|
849 | 990 | ExecutorService executorService = mock(ExecutorService.class);
|
850 | 991 | when(threadPool.executor(anyString())).thenReturn(executorService);
|
|
0 commit comments