Skip to content

Commit 17a02be

Browse files
authored
Always log data node failures (#127420) (#127512)
Log search exceptions as they occur on the data node no matter the value of error_trace. (cherry picked from commit 0538958) # Conflicts: # qa/smoke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java # server/src/main/java/org/elasticsearch/search/SearchService.java # server/src/test/java/org/elasticsearch/search/SearchServiceTests.java # x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchErrorTraceIT.java
1 parent bfb217a commit 17a02be

File tree

5 files changed

+73
-172
lines changed

5 files changed

+73
-172
lines changed

qa/smoke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java

Lines changed: 17 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -130,34 +130,7 @@ public void testSearchFailingQueryErrorTraceFalse() throws IOException {
130130
assertFalse(hasStackTrace.get());
131131
}
132132

133-
public void testDataNodeDoesNotLogStackTraceWhenErrorTraceTrue() throws IOException {
134-
hasStackTrace = new AtomicBoolean();
135-
setupIndexWithDocs();
136-
137-
Request searchRequest = new Request("POST", "/_search");
138-
searchRequest.setJsonEntity("""
139-
{
140-
"query": {
141-
"simple_query_string" : {
142-
"query": "foo",
143-
"fields": ["field"]
144-
}
145-
}
146-
}
147-
""");
148-
149-
String errorTriggeringIndex = "test2";
150-
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
151-
try (var mockLog = MockLog.capture(SearchService.class)) {
152-
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
153-
154-
searchRequest.addParameter("error_trace", "true");
155-
getRestClient().performRequest(searchRequest);
156-
mockLog.assertAllExpectationsMatched();
157-
}
158-
}
159-
160-
public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmpty() throws IOException {
133+
public void testDataNodeLogsStackTrace() throws IOException {
161134
hasStackTrace = new AtomicBoolean();
162135
setupIndexWithDocs();
163136

@@ -178,10 +151,14 @@ public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmpty() throws IOExce
178151
try (var mockLog = MockLog.capture(SearchService.class)) {
179152
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
180153

181-
// error_trace defaults to false so we can test both cases with some randomization
182-
if (randomBoolean()) {
154+
// No matter the value of error_trace (empty, true, or false) we should see stack traces logged
155+
int errorTraceValue = randomIntBetween(0, 2);
156+
if (errorTraceValue == 0) {
157+
searchRequest.addParameter("error_trace", "true");
158+
} else if (errorTraceValue == 1) {
183159
searchRequest.addParameter("error_trace", "false");
184-
}
160+
} // else empty
161+
185162
getRestClient().performRequest(searchRequest);
186163
mockLog.assertAllExpectationsMatched();
187164
}
@@ -241,33 +218,7 @@ public void testMultiSearchFailingQueryErrorTraceFalse() throws IOException {
241218
assertFalse(hasStackTrace.get());
242219
}
243220

244-
public void testDataNodeDoesNotLogStackTraceWhenErrorTraceTrueMultiSearch() throws IOException {
245-
hasStackTrace = new AtomicBoolean();
246-
setupIndexWithDocs();
247-
248-
XContentType contentType = XContentType.JSON;
249-
MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
250-
new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
251-
);
252-
Request searchRequest = new Request("POST", "/_msearch");
253-
byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
254-
searchRequest.setEntity(
255-
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
256-
);
257-
258-
searchRequest.addParameter("error_trace", "true");
259-
260-
String errorTriggeringIndex = "test2";
261-
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
262-
try (var mockLog = MockLog.capture(SearchService.class)) {
263-
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
264-
265-
getRestClient().performRequest(searchRequest);
266-
mockLog.assertAllExpectationsMatched();
267-
}
268-
}
269-
270-
public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmptyMultiSearch() throws IOException {
221+
public void testDataNodeLogsStackTraceMultiSearch() throws IOException {
271222
hasStackTrace = new AtomicBoolean();
272223
setupIndexWithDocs();
273224

@@ -281,16 +232,19 @@ public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmptyMultiSearch() th
281232
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
282233
);
283234

284-
// error_trace defaults to false so we can test both cases with some randomization
285-
if (randomBoolean()) {
286-
searchRequest.addParameter("error_trace", "false");
287-
}
288-
289235
String errorTriggeringIndex = "test2";
290236
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
291237
try (var mockLog = MockLog.capture(SearchService.class)) {
292238
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
293239

240+
// No matter the value of error_trace (empty, true, or false) we should see stack traces logged
241+
int errorTraceValue = randomIntBetween(0, 2);
242+
if (errorTraceValue == 0) {
243+
searchRequest.addParameter("error_trace", "true");
244+
} else if (errorTraceValue == 1) {
245+
searchRequest.addParameter("error_trace", "false");
246+
} // else empty
247+
294248
getRestClient().performRequest(searchRequest);
295249
mockLog.assertAllExpectationsMatched();
296250
}

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -522,10 +522,11 @@ protected void doClose() {
522522
}
523523

524524
/**
525-
* Wraps the listener to avoid sending StackTraces back to the coordinating
526-
* node if the `error_trace` header is set to {@code false}. Upon reading we
527-
* default to {@code true} to maintain the same behavior as before the change,
528-
* due to older nodes not being able to specify whether it needs stack traces.
525+
* Wraps the listener to ensure errors are logged and to avoid sending
526+
* StackTraces back to the coordinating node if the `error_trace` header is
527+
* set to {@code false}. Upon reading, we default to {@code true} to maintain
528+
* the same behavior as before the change, due to older nodes not being able
529+
* to specify whether they need stack traces.
529530
*
530531
* @param <T> the type of the response
531532
* @param listener the action listener to be wrapped
@@ -536,44 +537,47 @@ protected void doClose() {
536537
* @param threadPool with context where to write the new header
537538
* @return the wrapped action listener
538539
*/
539-
static <T> ActionListener<T> maybeWrapListenerForStackTrace(
540+
static <T> ActionListener<T> wrapListenerForErrorHandling(
540541
ActionListener<T> listener,
541542
TransportVersion version,
542543
String nodeId,
543544
ShardId shardId,
544545
long taskId,
545546
ThreadPool threadPool
546547
) {
547-
boolean header = true;
548+
final boolean header;
548549
if (version.onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
549550
header = Boolean.parseBoolean(threadPool.getThreadContext().getHeaderOrDefault("error_trace", "false"));
550-
}
551-
if (header == false) {
552-
return listener.delegateResponse((l, e) -> {
553-
org.apache.logging.log4j.util.Supplier<String> messageSupplier = () -> format(
554-
"[%s]%s: failed to execute search request for task [%d]",
555-
nodeId,
556-
shardId,
557-
taskId
558-
);
559-
// Keep this logic aligned with that of SUPPRESSED_ERROR_LOGGER in RestResponse
560-
if (ExceptionsHelper.status(e).getStatus() < 500 || ExceptionsHelper.isNodeOrShardUnavailableTypeException(e)) {
561-
logger.debug(messageSupplier, e);
562-
} else {
563-
logger.warn(messageSupplier, e);
564-
}
551+
} else {
552+
header = true;
553+
}
554+
return listener.delegateResponse((l, e) -> {
555+
org.apache.logging.log4j.util.Supplier<String> messageSupplier = () -> format(
556+
"[%s]%s: failed to execute search request for task [%d]",
557+
nodeId,
558+
shardId,
559+
taskId
560+
);
561+
// Keep this logic aligned with that of SUPPRESSED_ERROR_LOGGER in RestResponse
562+
if (ExceptionsHelper.status(e).getStatus() < 500 || ExceptionsHelper.isNodeOrShardUnavailableTypeException(e)) {
563+
logger.debug(messageSupplier, e);
564+
} else {
565+
logger.warn(messageSupplier, e);
566+
}
567+
568+
if (header == false) {
565569
ExceptionsHelper.unwrapCausesAndSuppressed(e, err -> {
566570
err.setStackTrace(EMPTY_STACK_TRACE_ARRAY);
567571
return false;
568572
});
569-
l.onFailure(e);
570-
});
571-
}
572-
return listener;
573+
}
574+
575+
l.onFailure(e);
576+
});
573577
}
574578

575579
public void executeDfsPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) {
576-
listener = maybeWrapListenerForStackTrace(
580+
listener = wrapListenerForErrorHandling(
577581
listener,
578582
request.getChannelVersion(),
579583
clusterService.localNode().getId(),
@@ -618,7 +622,7 @@ private void loadOrExecuteQueryPhase(final ShardSearchRequest request, final Sea
618622
}
619623

620624
public void executeQueryPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) {
621-
ActionListener<SearchPhaseResult> finalListener = maybeWrapListenerForStackTrace(
625+
ActionListener<SearchPhaseResult> finalListener = wrapListenerForErrorHandling(
622626
listener,
623627
request.getChannelVersion(),
624628
clusterService.localNode().getId(),
@@ -820,7 +824,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh
820824
public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShardTask task, ActionListener<RankFeatureResult> listener) {
821825
final ReaderContext readerContext = findReaderContext(request.contextId(), request);
822826
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest());
823-
listener = maybeWrapListenerForStackTrace(
827+
listener = wrapListenerForErrorHandling(
824828
listener,
825829
shardSearchRequest.getChannelVersion(),
826830
clusterService.localNode().getId(),
@@ -873,7 +877,7 @@ public void executeQueryPhase(
873877
TransportVersion version
874878
) {
875879
final LegacyReaderContext readerContext = (LegacyReaderContext) findReaderContext(request.contextId(), request);
876-
listener = maybeWrapListenerForStackTrace(
880+
listener = wrapListenerForErrorHandling(
877881
listener,
878882
version,
879883
clusterService.localNode().getId(),
@@ -923,7 +927,7 @@ public void executeQueryPhase(
923927
) {
924928
final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest());
925929
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest());
926-
listener = maybeWrapListenerForStackTrace(
930+
listener = wrapListenerForErrorHandling(
927931
listener,
928932
version,
929933
clusterService.localNode().getId(),

server/src/test/java/org/elasticsearch/search/SearchServiceTests.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
import java.util.function.Predicate;
6464

6565
import static org.elasticsearch.common.Strings.format;
66-
import static org.elasticsearch.search.SearchService.maybeWrapListenerForStackTrace;
66+
import static org.elasticsearch.search.SearchService.wrapListenerForErrorHandling;
6767
import static org.hamcrest.CoreMatchers.is;
6868
import static org.hamcrest.Matchers.not;
6969

@@ -128,7 +128,7 @@ public Type getType() {
128128
doTestCanMatch(searchRequest, sortField, true, null, false);
129129
}
130130

131-
public void testMaybeWrapListenerForStackTrace() {
131+
public void testWrapListenerForErrorHandling() {
132132
ShardId shardId = new ShardId("index", "index", 0);
133133
// Tests that the same listener has stack trace if is not wrapped or does not have stack trace if it is wrapped.
134134
AtomicBoolean isWrapped = new AtomicBoolean(false);
@@ -151,12 +151,12 @@ public void onFailure(Exception e) {
151151
e.fillInStackTrace();
152152
assertThat(e.getStackTrace().length, is(not(0)));
153153
listener.onFailure(e);
154-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), "node", shardId, 123L, threadPool);
154+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), "node", shardId, 123L, threadPool);
155155
isWrapped.set(true);
156156
listener.onFailure(e);
157157
}
158158

159-
public void testMaybeWrapListenerForStackTraceDebugLog() {
159+
public void testWrapListenerForErrorHandlingDebugLog() {
160160
final String nodeId = "node";
161161
final String index = "index";
162162
ShardId shardId = new ShardId(index, index, 0);
@@ -189,12 +189,12 @@ public void onFailure(Exception e) {
189189
}
190190
};
191191
IllegalArgumentException e = new IllegalArgumentException(exceptionMessage); // 400-level exception
192-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
192+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
193193
listener.onFailure(e);
194194
}
195195
}
196196

197-
public void testMaybeWrapListenerForStackTraceWarnLog() {
197+
public void testWrapListenerForErrorHandlingWarnLog() {
198198
final String nodeId = "node";
199199
final String index = "index";
200200
ShardId shardId = new ShardId(index, index, 0);
@@ -226,7 +226,7 @@ public void onFailure(Exception e) {
226226
}
227227
};
228228
IllegalStateException e = new IllegalStateException(exceptionMessage); // 500-level exception
229-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
229+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
230230
listener.onFailure(e);
231231
}
232232
}

test/framework/src/main/java/org/elasticsearch/search/ErrorTraceHelper.java

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import java.util.stream.Collectors;
1919

2020
import static org.elasticsearch.common.Strings.format;
21-
import static org.elasticsearch.test.ESIntegTestCase.getNodeId;
2221
import static org.elasticsearch.test.ESIntegTestCase.internalCluster;
2322

2423
/**
@@ -62,32 +61,4 @@ public static void addSeenLoggingExpectations(int numShards, MockLog mockLog, St
6261
);
6362
}
6463
}
65-
66-
/**
67-
* Adds expectations for the _absence_ of debug logging of a message. An unseen expectation is added for each
68-
* combination of node in the internal cluster and shard in the index.
69-
*
70-
* @param numShards the number of shards in the index (an expectation will be added for each shard)
71-
* @param mockLog the mock log
72-
* @param errorTriggeringIndex the name of the index that will trigger the error
73-
*/
74-
public static void addUnseenLoggingExpectations(int numShards, MockLog mockLog, String errorTriggeringIndex) {
75-
for (String nodeName : internalCluster().getNodeNames()) {
76-
for (int shard = 0; shard < numShards; shard++) {
77-
mockLog.addExpectation(
78-
new MockLog.UnseenEventExpectation(
79-
format(
80-
"\"[%s][%s][%d]: failed to execute search request\" and an exception logged",
81-
getNodeId(nodeName),
82-
errorTriggeringIndex,
83-
shard
84-
),
85-
SearchService.class.getCanonicalName(),
86-
Level.DEBUG,
87-
format("[%s][%s][%d]: failed to execute search request", getNodeId(nodeName), errorTriggeringIndex, shard)
88-
)
89-
);
90-
}
91-
}
92-
}
9364
}

0 commit comments

Comments
 (0)