Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDFS-17704. Fix TestDecommission and TestDecommissionWithBackoffMonitor often run timeout. #7266

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.util.Lists;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -69,6 +71,8 @@ public class AdminStatesBaseTest {

@Rule
public TemporaryFolder baseDir = new TemporaryFolder();
@Rule
public Timeout timeout = new Timeout(600, TimeUnit.SECONDS);

private HostsFileWriter hostsFileWriter;
private Configuration conf;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ private void verifyStats(NameNode namenode, FSNamesystem fsn,
/**
* Tests decommission for non federated cluster
*/
@Test(timeout=360000)
@Test
public void testDecommission() throws IOException {
testDecommission(1, 6);
}
Expand All @@ -198,7 +198,7 @@ public void testDecommission() throws IOException {
* to other datanodes and satisfy the replication factor. Make sure the
* datanode won't get stuck in decommissioning state.
*/
@Test(timeout = 360000)
@Test
public void testDecommission2() throws IOException {
LOG.info("Starting test testDecommission");
int numNamenodes = 1;
Expand Down Expand Up @@ -247,7 +247,7 @@ public void testDecommission2() throws IOException {
/**
* Test decommission for federeated cluster
*/
@Test(timeout=360000)
@Test
public void testDecommissionFederation() throws IOException {
testDecommission(2, 2);
}
Expand All @@ -262,7 +262,7 @@ public void testDecommissionFederation() throws IOException {
* That creates inconsistent state and prevent SBN from finishing
* decommission.
*/
@Test(timeout=360000)
@Test
public void testDecommissionOnStandby() throws Exception {
getConf().setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
getConf().setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
Expand Down Expand Up @@ -435,7 +435,7 @@ private void testDecommission(int numNamenodes, int numDatanodes)
/**
* Test that over-replicated blocks are deleted on recommission.
*/
@Test(timeout=120000)
@Test
public void testRecommission() throws Exception {
final int numDatanodes = 6;
try {
Expand Down Expand Up @@ -516,7 +516,7 @@ public Boolean get() {
* Tests cluster storage statistics during decommissioning for non
* federated cluster
*/
@Test(timeout=360000)
@Test
public void testClusterStats() throws Exception {
testClusterStats(1);
}
Expand All @@ -525,7 +525,7 @@ public void testClusterStats() throws Exception {
* Tests cluster storage statistics during decommissioning for
* federated cluster
*/
@Test(timeout=360000)
@Test
public void testClusterStatsFederation() throws Exception {
testClusterStats(3);
}
Expand Down Expand Up @@ -575,7 +575,7 @@ private DataNode getDataNode(DatanodeInfo decomInfo) {
* in the include file are allowed to connect to the namenode in a non
* federated cluster.
*/
@Test(timeout=360000)
@Test
public void testHostsFile() throws IOException, InterruptedException {
// Test for a single namenode cluster
testHostsFile(1);
Expand All @@ -586,7 +586,7 @@ public void testHostsFile() throws IOException, InterruptedException {
* in the include file are allowed to connect to the namenode in a
* federated cluster.
*/
@Test(timeout=360000)
@Test
public void testHostsFileFederation()
throws IOException, InterruptedException {
// Test for 3 namenode federated cluster
Expand Down Expand Up @@ -624,7 +624,7 @@ public void testHostsFile(int numNameNodes) throws IOException,
}
}

@Test(timeout=120000)
@Test
public void testDecommissionWithOpenfile()
throws IOException, InterruptedException {
LOG.info("Starting test testDecommissionWithOpenfile");
Expand Down Expand Up @@ -676,7 +676,7 @@ public void testDecommissionWithOpenfile()
fdos.close();
}

@Test(timeout = 20000)
@Test
public void testDecommissionWithUnknownBlock() throws IOException {
startCluster(1, 3);

Expand Down Expand Up @@ -795,7 +795,7 @@ public Boolean get() {
}
}

@Test(timeout=180000)
@Test
public void testDecommissionWithOpenfileReporting()
throws Exception {
LOG.info("Starting test testDecommissionWithOpenfileReporting");
Expand Down Expand Up @@ -901,7 +901,7 @@ public void run() {
* 2. close file with decommissioning
* @throws Exception
*/
@Test(timeout=360000)
@Test
public void testDecommissionWithCloseFileAndListOpenFiles()
throws Exception {
LOG.info("Starting test testDecommissionWithCloseFileAndListOpenFiles");
Expand Down Expand Up @@ -958,7 +958,7 @@ public void testDecommissionWithCloseFileAndListOpenFiles()
fileSys.delete(file, false);
}

@Test(timeout = 360000)
@Test
public void testDecommissionWithOpenFileAndBlockRecovery()
throws IOException, InterruptedException {
startCluster(1, 6);
Expand Down Expand Up @@ -1005,7 +1005,7 @@ public void testDecommissionWithOpenFileAndBlockRecovery()
assertEquals(dfs.getFileStatus(file).getLen(), writtenBytes);
}

@Test(timeout=120000)
@Test
public void testCloseWhileDecommission() throws IOException,
ExecutionException, InterruptedException {
LOG.info("Starting test testCloseWhileDecommission");
Expand Down Expand Up @@ -1064,7 +1064,7 @@ public void testCloseWhileDecommission() throws IOException,
* to the IBR, all three nodes dn1/dn2/dn3 enter Decommissioning and then the
* DN reports the IBR.
*/
@Test(timeout=120000)
@Test
public void testAllocAndIBRWhileDecommission() throws IOException {
LOG.info("Starting test testAllocAndIBRWhileDecommission");
getConf().setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
Expand Down Expand Up @@ -1149,7 +1149,7 @@ public void testAllocAndIBRWhileDecommission() throws IOException {
/**
* Tests restart of namenode while datanode hosts are added to exclude file
**/
@Test(timeout=360000)
@Test
public void testDecommissionWithNamenodeRestart()
throws IOException, InterruptedException {
LOG.info("Starting test testDecommissionWithNamenodeRestart");
Expand Down Expand Up @@ -1201,7 +1201,7 @@ public void testDecommissionWithNamenodeRestart()
/**
* Tests dead node count after restart of namenode
**/
@Test(timeout=360000)
@Test
public void testDeadNodeCountAfterNamenodeRestart()throws Exception {
LOG.info("Starting test testDeadNodeCountAfterNamenodeRestart");
int numNamenodes = 1;
Expand Down Expand Up @@ -1248,7 +1248,7 @@ public void testDeadNodeCountAfterNamenodeRestart()throws Exception {
* valid DNS hostname for the DataNode. See HDFS-5237 for background.
*/
@Ignore
@Test(timeout=360000)
@Test
public void testIncludeByRegistrationName() throws Exception {
// Any IPv4 address starting with 127 functions as a "loopback" address
// which is connected to the current host. So by choosing 127.0.0.100
Expand Down Expand Up @@ -1314,7 +1314,7 @@ public Boolean get() {
}, 500, 5000);
}

@Test(timeout=120000)
@Test
public void testBlocksPerInterval() throws Exception {
GenericTestUtils.setLogLevel(
LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE);
Expand Down Expand Up @@ -1369,7 +1369,7 @@ private void doDecomCheck(DatanodeManager datanodeManager,
/**
* Test DatanodeAdminManager#monitor can swallow any exceptions by default.
*/
@Test(timeout=120000)
@Test
public void testPendingNodeButDecommissioned() throws Exception {
// Only allow one node to be decom'd at a time
getConf().setInt(
Expand Down Expand Up @@ -1416,7 +1416,7 @@ public void testPendingNodeButDecommissioned() throws Exception {
}
}

@Test(timeout=120000)
@Test
public void testPendingNodes() throws Exception {
GenericTestUtils.setLogLevel(
LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE);
Expand Down Expand Up @@ -1639,7 +1639,7 @@ public void testUsedCapacity() throws Exception {
/**
* Verify if multiple DataNodes can be decommission at the same time.
*/
@Test(timeout = 360000)
@Test
public void testMultipleNodesDecommission() throws Exception {
startCluster(1, 5);
final Path file = new Path("/testMultipleNodesDecommission.dat");
Expand Down Expand Up @@ -1685,7 +1685,7 @@ public Boolean get() {
* Force the tracked nodes set to be filled with nodes lost while decommissioning,
* then decommission healthy nodes & validate they are decommissioned eventually.
*/
@Test(timeout = 120000)
@Test
public void testRequeueUnhealthyDecommissioningNodes() throws Exception {
// Create a MiniDFSCluster with 3 live datanode in AdminState=NORMAL and
// 2 dead datanodes in AdminState=DECOMMISSION_INPROGRESS and a file
Expand Down Expand Up @@ -1911,7 +1911,8 @@ private void createClusterWithDeadNodesDecommissionInProgress(final int numLiveN
under-replicated block can be replicated to sufficient datanodes & the decommissioning
node can be decommissioned.
*/
@Test(timeout = 60000)
@SuppressWarnings("checkstyle:methodlength")
@Test
public void testDeleteCorruptReplicaForUnderReplicatedBlock() throws Exception {
// Constants
final Path file = new Path("/test-file");
Expand Down
Loading
Loading