From 4ca669ad420227cba369346cb0230d7bb12f2f11 Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Mon, 24 Feb 2025 08:22:37 +0100 Subject: [PATCH] [test] Fix `RetrySearchIntegTests` (#122919) Don't simultaneously restart multiple nodes in a cluster. It causes data races when multiple primaries are trying to mark the `[[.snapshot-blob-cache][0]]` shard as stale. ``` org.elasticsearch.cluster.action.shard.ShardStateAction$NoLongerPrimaryShardException: primary term [2] did not match current primary term [4] at org.elasticsearch.cluster.action.shard.ShardStateAction$ShardFailedClusterStateTaskExecutor.execute(ShardStateAction.java:355) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.innerExecuteTasks(MasterService.java:1075) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:1038) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeAndPublishBatch(MasterService.java:245) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.lambda$run$2(MasterService.java:1691) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.run(MasterService.java:1688) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.lambda$doRun$0(MasterService.java:1283) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.doRun(MasterService.java:1262) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:1044) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:27) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?] at java.lang.Thread.run(Thread.java:1575) ~[?:?] ``` Resolve #118374 Resolve #120442 --- muted-tests.yml | 3 --- .../xpack/searchablesnapshots/RetrySearchIntegTests.java | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 96977744a89ea..d12b04183797a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -122,9 +122,6 @@ tests: - class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT method: test {p0=data_stream/120_data_streams_stats/Multiple data stream} issue: https://github.com/elastic/elasticsearch/issues/118217 -- class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests - method: testSearcherId - issue: https://github.com/elastic/elasticsearch/issues/118374 - class: org.elasticsearch.xpack.ccr.rest.ShardChangesRestIT method: testShardChangesNoOperation issue: https://github.com/elastic/elasticsearch/issues/118800 diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java index c50fe50db8b40..c9a1a82b34118 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java @@ -90,6 +90,7 @@ public void testSearcherId() throws Exception { for (String allocatedNode : allocatedNodes) { if (randomBoolean()) { internalCluster().restartNode(allocatedNode); + ensureGreen(indexName); } } ensureGreen(indexName); @@ -151,6 +152,7 @@ public void testRetryPointInTime() throws Exception { final Set allocatedNodes = internalCluster().nodesInclude(indexName); for (String allocatedNode : allocatedNodes) { internalCluster().restartNode(allocatedNode); + ensureGreen(indexName); } ensureGreen(indexName); assertNoFailuresAndResponse(