Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8c78bec

Browse files
committedJan 16, 2025·
HDFS-17709. [ARR] Add async responder performance metrics.
1 parent 273673c commit 8c78bec

File tree

4 files changed

+26
-1
lines changed

4 files changed

+26
-1
lines changed
 

‎hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/protocolPB/AsyncRpcProtocolPBUtil.java

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.hdfs.protocolPB;
2020

21+
import org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCMetrics;
2122
import org.apache.hadoop.hdfs.server.federation.router.ThreadLocalContext;
2223
import org.apache.hadoop.hdfs.server.federation.router.async.utils.ApplyFunction;
2324
import org.apache.hadoop.hdfs.server.federation.router.async.utils.AsyncUtil;
@@ -28,6 +29,7 @@
2829
import org.apache.hadoop.ipc.ProtobufRpcEngineCallback2;
2930
import org.apache.hadoop.ipc.internal.ShadedProtobufHelper;
3031
import org.apache.hadoop.thirdparty.protobuf.Message;
32+
import org.apache.hadoop.util.Time;
3133
import org.apache.hadoop.util.concurrent.AsyncGet;
3234
import org.slf4j.Logger;
3335
import org.slf4j.LoggerFactory;
@@ -87,6 +89,7 @@ public static <T, R> R asyncIpcClient(
8789
// transfer thread local context to worker threads of executor.
8890
ThreadLocalContext threadLocalContext = new ThreadLocalContext();
8991
asyncCompleteWith(responseFuture.handleAsync((result, e) -> {
92+
FederationRPCMetrics.ASYNC_RESPONDER_START_TIME.set(Time.monotonicNow());
9093
threadLocalContext.transfer();
9194
if (e != null) {
9295
throw warpCompletionException(e);
@@ -136,6 +139,7 @@ public static <T> void asyncRouterServer(ServerReq<T> req, ServerRes<T> res) {
136139
} else {
137140
callback.error(e.getCause());
138141
}
142+
FederationRPCMetrics.addAsyncResponderThreadTime();
139143
return null;
140144
});
141145
}

‎hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java

+19
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
3131
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
3232
import org.apache.hadoop.metrics2.lib.MutableRate;
33+
import org.apache.hadoop.util.Time;
3334

3435
/**
3536
* Implementation of the RPC metrics collector.
@@ -41,9 +42,13 @@ public class FederationRPCMetrics implements FederationRPCMBean {
4142
private final MetricsRegistry registry = new MetricsRegistry("router");
4243

4344
private RouterRpcServer rpcServer;
45+
public static final ThreadLocal<Long> ASYNC_RESPONDER_START_TIME = ThreadLocal.withInitial(() -> -1L);
46+
public static final ThreadLocal<Long> ASYNC_RESPONDER_END_TIME = ThreadLocal.withInitial(() -> -1L);
4447

4548
@Metric("Time for the router to process an operation internally")
4649
private MutableRate processing;
50+
@Metric("Time for the router async responder to process an operation internally")
51+
private static MutableRate asyncResponderProcessing;
4752
@Metric("Number of operations the Router processed internally")
4853
private MutableCounterLong processingOp;
4954
@Metric("Time for the Router to proxy an operation to the Namenodes")
@@ -301,6 +306,20 @@ public void addProcessingTime(long time) {
301306
processing.add(time);
302307
processingOp.incr();
303308
}
309+
310+
public static void addAsyncResponderThreadTime() {
311+
ASYNC_RESPONDER_END_TIME.set(Time.monotonicNow());
312+
long duration = getAsyncResponderProcessingTime();
313+
asyncResponderProcessing.add(duration);
314+
}
315+
316+
public static long getAsyncResponderProcessingTime() {
317+
if (ASYNC_RESPONDER_START_TIME.get() != null && ASYNC_RESPONDER_START_TIME.get() > 0 &&
318+
ASYNC_RESPONDER_END_TIME.get() != null && ASYNC_RESPONDER_END_TIME.get() > 0) {
319+
return ASYNC_RESPONDER_END_TIME.get() - ASYNC_RESPONDER_START_TIME.get();
320+
}
321+
return -1;
322+
}
304323

305324
@Override
306325
public double getProcessingAvg() {

‎hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ public void routerFailureLocked() {
293293
private long getProcessingTime() {
294294
if (START_TIME.get() != null && START_TIME.get() > 0 &&
295295
PROXY_TIME.get() != null && PROXY_TIME.get() > 0) {
296-
return PROXY_TIME.get() - START_TIME.get();
296+
return PROXY_TIME.get() - START_TIME.get();
297297
}
298298
return -1;
299299
}

‎hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ThreadLocalContext.java

+2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
*/
1818
package org.apache.hadoop.hdfs.server.federation.router;
1919

20+
import org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCMetrics;
2021
import org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor;
2122
import org.apache.hadoop.ipc.CallerContext;
2223
import org.apache.hadoop.ipc.Server;
24+
import org.apache.hadoop.util.Time;
2325

2426
/**
2527
* The ThreadLocalContext class is designed to capture and transfer the context of a

0 commit comments

Comments
 (0)
Please sign in to comment.