From e6c1f6ded13df378848f1ee05f77e24ec8a3a1ac Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Mon, 24 Feb 2025 09:58:32 +0100 Subject: [PATCH] [8.x] [test] Fix `RetrySearchIntegTests` (#122919) (#123237) Backports #122919 to 8.x > Don't simultaneously restart multiple nodes in a cluster. It causes data races when multiple primaries are trying to mark the `[[.snapshot-blob-cache][0]]` shard as stale. ``` org.elasticsearch.cluster.action.shard.ShardStateAction$NoLongerPrimaryShardException: primary term [2] did not match current primary term [4] at org.elasticsearch.cluster.action.shard.ShardStateAction$ShardFailedClusterStateTaskExecutor.execute(ShardStateAction.java:355) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.innerExecuteTasks(MasterService.java:1075) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:1038) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeAndPublishBatch(MasterService.java:245) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.lambda$run$2(MasterService.java:1691) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.run(MasterService.java:1688) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.lambda$doRun$0(MasterService.java:1283) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.doRun(MasterService.java:1262) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:1044) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:27) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?] at java.lang.Thread.run(Thread.java:1575) ~[?:?] ``` Resolve #118374 Resolve #120442 --- muted-tests.yml | 6 ------ .../xpack/searchablesnapshots/RetrySearchIntegTests.java | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 981c2e363457e..4b1871806bc71 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -355,9 +355,6 @@ tests: - class: org.elasticsearch.action.search.SearchQueryThenFetchAsyncActionTests method: testBottomFieldSort issue: https://github.com/elastic/elasticsearch/issues/118214 -- class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests - method: testRetryPointInTime - issue: https://github.com/elastic/elasticsearch/issues/120442 - class: org.elasticsearch.xpack.inference.DefaultEndPointsIT method: testMultipleInferencesTriggeringDownloadAndDeploy issue: https://github.com/elastic/elasticsearch/issues/117208 @@ -429,9 +426,6 @@ tests: - class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT method: test {yaml=reference/cat/allocation/cat-allocation-example} issue: https://github.com/elastic/elasticsearch/issues/121976 -- class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests - method: testSearcherId - issue: https://github.com/elastic/elasticsearch/issues/118374 - class: org.elasticsearch.xpack.security.authc.ldap.GroupMappingIT issue: https://github.com/elastic/elasticsearch/issues/121291 - class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java index c50fe50db8b40..c9a1a82b34118 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java @@ -90,6 +90,7 @@ public void testSearcherId() throws Exception { for (String allocatedNode : allocatedNodes) { if (randomBoolean()) { internalCluster().restartNode(allocatedNode); + ensureGreen(indexName); } } ensureGreen(indexName); @@ -151,6 +152,7 @@ public void testRetryPointInTime() throws Exception { final Set allocatedNodes = internalCluster().nodesInclude(indexName); for (String allocatedNode : allocatedNodes) { internalCluster().restartNode(allocatedNode); + ensureGreen(indexName); } ensureGreen(indexName); assertNoFailuresAndResponse(