Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support real-time gets on hollow shards #122012

Merged
merged 27 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
db73b39
Support returning latest hollow generation real-time GET requests
arteam Feb 7, 2025
0bced20
Also support multi-get requests
arteam Feb 8, 2025
44fed83
Merge remote-tracking branch 'origin/main' into hollow-real-time-gets
arteam Feb 8, 2025
3237dfa
Make TransportGetFromTranslogAction and TransportShardMultiGetFomTran…
arteam Feb 9, 2025
47b5083
Revert "Support returning latest hollow generation real-time GET requ…
arteam Feb 9, 2025
f207d15
Bring back original TransportGetFromTranslogAction and TransportShard…
arteam Feb 9, 2025
75431f7
Merge remote-tracking branch 'origin/main' into hollow-real-time-gets
arteam Feb 9, 2025
40301f2
Retry real-time gets for AlreadyClosedException
arteam Feb 10, 2025
ff0c191
Correctly check index service in case index is not a different node
arteam Feb 10, 2025
634cbf4
Merge branch 'main' into hollow-real-time-gets
arteam Feb 10, 2025
e8b47d2
Remove access to multi get request items
arteam Feb 10, 2025
195f5fb
Support returning latest hollow generation real-time GET requests
arteam Feb 7, 2025
0ca0c78
Throw UnsupportedOperationExceptions for `getLastUnsafeSegmentGenerat…
arteam Feb 10, 2025
87240c5
Also support multi-get requests
arteam Feb 8, 2025
25e5e9d
Call getLastUnsafeSegmentGenerationForGets directly on Engine
arteam Feb 10, 2025
ed8c902
Revert "Make TransportGetFromTranslogAction and TransportShardMultiGe…
arteam Feb 10, 2025
07ab4b7
Merge branch 'main' into hollow-real-time-gets
arteam Feb 10, 2025
ecf8aba
Merge branch 'main' into hollow-real-time-gets
arteam Feb 11, 2025
4806d11
Retry getTranslog calls if we encountered `AlreadyClosedException` an…
arteam Feb 11, 2025
1c03540
Merge branch 'main' into hollow-real-time-gets
arteam Feb 11, 2025
b965926
Revert "Correctly check index service in case index is not a differen…
arteam Feb 11, 2025
5d85f39
Merge branch 'main' into hollow-real-time-gets
arteam Feb 11, 2025
847bf5c
Correctly check index service in case index is not a different node
arteam Feb 10, 2025
4bdcafb
Use try versions of get*Translog calls so we don't retry forever
arteam Feb 11, 2025
01b81bb
Add TODOs for ES-10826
arteam Feb 11, 2025
8d4ab8e
Revert "Correctly check index service in case index is not a differen…
arteam Feb 11, 2025
d29028c
Merge branch 'main' into hollow-real-time-gets
arteam Feb 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
Expand Down Expand Up @@ -230,7 +231,10 @@ private void getFromTranslog(
final var retryingListener = listener.delegateResponse((l, e) -> {
final var cause = ExceptionsHelper.unwrapCause(e);
logger.debug("get_from_translog failed", cause);
if (cause instanceof ShardNotFoundException || cause instanceof IndexNotFoundException) {
if (cause instanceof ShardNotFoundException
|| cause instanceof IndexNotFoundException
|| cause instanceof AlreadyClosedException) {
// TODO AlreadyClosedException the engine reset should be fixed by ES-10826
logger.debug("retrying get_from_translog");
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
Expand All @@ -245,7 +249,13 @@ public void onClusterServiceClose() {

@Override
public void onTimeout(TimeValue timeout) {
l.onFailure(new ElasticsearchException("Timed out retrying get_from_translog", cause));
// TODO AlreadyClosedException the engine reset should be fixed by ES-10826
if (cause instanceof AlreadyClosedException) {
// Do an additional retry just in case AlreadyClosedException didn't generate a cluster update
tryGetFromTranslog(request, indexShard, node, l);
} else {
l.onFailure(new ElasticsearchException("Timed out retrying get_from_translog", cause));
}
}
});
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ protected void doExecute(Task task, Request request, ActionListener<Response> li
if (engine == null) {
throw new AlreadyClosedException("engine closed");
}
segmentGeneration = ((InternalEngine) engine).getLastUnsafeSegmentGenerationForGets();
segmentGeneration = engine.getLastUnsafeSegmentGenerationForGets();
}
return new Response(result, indexShard.getOperationPrimaryTerm(), segmentGeneration);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
Expand Down Expand Up @@ -211,7 +212,10 @@ private void shardMultiGetFromTranslog(
final var retryingListener = listener.delegateResponse((l, e) -> {
final var cause = ExceptionsHelper.unwrapCause(e);
logger.debug("mget_from_translog[shard] failed", cause);
if (cause instanceof ShardNotFoundException || cause instanceof IndexNotFoundException) {
if (cause instanceof ShardNotFoundException
|| cause instanceof IndexNotFoundException
|| cause instanceof AlreadyClosedException) {
// TODO AlreadyClosedException the engine reset should be fixed by ES-10826
logger.debug("retrying mget_from_translog[shard]");
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
Expand All @@ -226,7 +230,13 @@ public void onClusterServiceClose() {

@Override
public void onTimeout(TimeValue timeout) {
l.onFailure(new ElasticsearchException("Timed out retrying mget_from_translog[shard]", cause));
// TODO AlreadyClosedException the engine reset should be fixed by ES-10826
if (cause instanceof AlreadyClosedException) {
// Do an additional retry just in case AlreadyClosedException didn't generate a cluster update
tryShardMultiGetFromTranslog(request, indexShard, node, l);
} else {
l.onFailure(new ElasticsearchException("Timed out retrying mget_from_translog[shard]", cause));
}
}
});
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.engine.InternalEngine;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
Expand Down Expand Up @@ -102,7 +101,7 @@ protected void doExecute(Task task, Request request, ActionListener<Response> li
if (engine == null) {
throw new AlreadyClosedException("engine closed");
}
segmentGeneration = ((InternalEngine) engine).getLastUnsafeSegmentGenerationForGets();
segmentGeneration = engine.getLastUnsafeSegmentGenerationForGets();
}
return new Response(multiGetShardResponse, indexShard.getOperationPrimaryTerm(), segmentGeneration);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2346,4 +2346,8 @@ public record FlushResult(boolean flushPerformed, long generation) {
public void prepareForEngineReset() throws IOException {
throw new UnsupportedOperationException("does not support engine reset");
}

public long getLastUnsafeSegmentGenerationForGets() {
throw new UnsupportedOperationException("Doesn't support getting the latest segment generation");
}
}