HDFS-13179. TestLazyPersistReplicaRecovery#testDnRestartWithSavedReplicas fails intermittently. Contributed by Ahmed Hussein.
This commit is contained in:
parent
5abd0148eb
commit
1839c467f6
@ -3363,6 +3363,11 @@ boolean reserveLockedMemory(long bytesNeeded) {
|
|||||||
return cacheManager.reserve(bytesNeeded) > 0;
|
return cacheManager.reserve(bytesNeeded) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public int getNonPersistentReplicas() {
|
||||||
|
return ramDiskReplicaTracker.numReplicasNotPersisted();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void setTimer(Timer newTimer) {
|
public void setTimer(Timer newTimer) {
|
||||||
this.timer = newTimer;
|
this.timer = newTimer;
|
||||||
|
@ -19,6 +19,13 @@
|
|||||||
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.client.BlockReportOptions;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -27,6 +34,7 @@
|
|||||||
|
|
||||||
import static org.apache.hadoop.fs.StorageType.DEFAULT;
|
import static org.apache.hadoop.fs.StorageType.DEFAULT;
|
||||||
import static org.apache.hadoop.fs.StorageType.RAM_DISK;
|
import static org.apache.hadoop.fs.StorageType.RAM_DISK;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class TestLazyPersistReplicaRecovery extends LazyPersistTestCase {
|
public class TestLazyPersistReplicaRecovery extends LazyPersistTestCase {
|
||||||
@Test
|
@Test
|
||||||
@ -34,6 +42,10 @@ public void testDnRestartWithSavedReplicas()
|
|||||||
throws IOException, InterruptedException, TimeoutException {
|
throws IOException, InterruptedException, TimeoutException {
|
||||||
|
|
||||||
getClusterBuilder().build();
|
getClusterBuilder().build();
|
||||||
|
FSNamesystem fsn = cluster.getNamesystem();
|
||||||
|
final DataNode dn = cluster.getDataNodes().get(0);
|
||||||
|
DatanodeDescriptor dnd =
|
||||||
|
NameNodeAdapter.getDatanode(fsn, dn.getDatanodeId());
|
||||||
final String METHOD_NAME = GenericTestUtils.getMethodName();
|
final String METHOD_NAME = GenericTestUtils.getMethodName();
|
||||||
Path path1 = new Path("/" + METHOD_NAME + ".01.dat");
|
Path path1 = new Path("/" + METHOD_NAME + ".01.dat");
|
||||||
|
|
||||||
@ -42,14 +54,17 @@ public void testDnRestartWithSavedReplicas()
|
|||||||
|
|
||||||
// Sleep for a short time to allow the lazy writer thread to do its job.
|
// Sleep for a short time to allow the lazy writer thread to do its job.
|
||||||
// However the block replica should not be evicted from RAM_DISK yet.
|
// However the block replica should not be evicted from RAM_DISK yet.
|
||||||
Thread.sleep(3 * LAZY_WRITER_INTERVAL_SEC * 1000);
|
FsDatasetImpl fsDImpl = (FsDatasetImpl) DataNodeTestUtils.getFSDataset(dn);
|
||||||
|
GenericTestUtils
|
||||||
|
.waitFor(() -> fsDImpl.getNonPersistentReplicas() == 0, 10,
|
||||||
|
3 * LAZY_WRITER_INTERVAL_SEC * 1000);
|
||||||
ensureFileReplicasOnStorageType(path1, RAM_DISK);
|
ensureFileReplicasOnStorageType(path1, RAM_DISK);
|
||||||
|
|
||||||
LOG.info("Restarting the DataNode");
|
LOG.info("Restarting the DataNode");
|
||||||
cluster.restartDataNode(0, true);
|
assertTrue("DN did not restart properly",
|
||||||
cluster.waitActive();
|
cluster.restartDataNode(0, true));
|
||||||
triggerBlockReport();
|
// wait for blockreport
|
||||||
|
waitForBlockReport(dn, dnd);
|
||||||
// Ensure that the replica is now on persistent storage.
|
// Ensure that the replica is now on persistent storage.
|
||||||
ensureFileReplicasOnStorageType(path1, DEFAULT);
|
ensureFileReplicasOnStorageType(path1, DEFAULT);
|
||||||
}
|
}
|
||||||
@ -73,4 +88,20 @@ public void testDnRestartWithUnsavedReplicas()
|
|||||||
// Ensure that the replica is still on transient storage.
|
// Ensure that the replica is still on transient storage.
|
||||||
ensureFileReplicasOnStorageType(path1, RAM_DISK);
|
ensureFileReplicasOnStorageType(path1, RAM_DISK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean waitForBlockReport(final DataNode dn,
|
||||||
|
final DatanodeDescriptor dnd) throws IOException, InterruptedException {
|
||||||
|
final DatanodeStorageInfo storage = dnd.getStorageInfos()[0];
|
||||||
|
final long lastCount = storage.getBlockReportCount();
|
||||||
|
dn.triggerBlockReport(
|
||||||
|
new BlockReportOptions.Factory().setIncremental(false).build());
|
||||||
|
try {
|
||||||
|
GenericTestUtils
|
||||||
|
.waitFor(() -> lastCount != storage.getBlockReportCount(), 10, 10000);
|
||||||
|
} catch (TimeoutException te) {
|
||||||
|
LOG.error("Timeout waiting for block report for {}", dnd);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user