HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage size is smaller than IO buffer size. Contributed by Zhe Zhang.

Change-Id: I09896c46e9ee0718b67c64fac5acfb3f7decf0b9
This commit is contained in:
Zhe Zhang 2015-11-02 10:03:39 -08:00
parent 04d97f8abb
commit 259bea3b48
2 changed files with 62 additions and 20 deletions

View File

@ -2204,6 +2204,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9343. Empty caller context considered invalid. (Mingliang Liu via
Arpit Agarwal)
HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage
size is smaller than IO buffer size. (zhz)
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -25,13 +25,16 @@
import java.io.IOException;
import java.net.URI;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import com.google.common.base.Supplier;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
@ -109,12 +112,16 @@ public void testSuccessfulBaseCase() throws Exception {
"storage directory does not exist or is not accessible", ioe);
}
int expectedCheckpointTxId = (int)NameNodeAdapter.getNamesystem(nn0)
.getFSImage().getMostRecentCheckpointTxId();
int rc = BootstrapStandby.run(new String[] { "-nonInteractive" },
cluster.getConfiguration(index));
assertEquals(0, rc);
// Should have copied over the namespace from the active
FSImageTestUtil.assertNNHasCheckpoints(cluster, index, ImmutableList.of(0));
FSImageTestUtil.assertNNHasCheckpoints(cluster, index,
ImmutableList.of(expectedCheckpointTxId));
}
// We should now be able to start the standbys successfully.
@ -221,7 +228,7 @@ public void testOtherNodeNotActive() throws Exception {
* {@link DFSConfigKeys#DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY}
* created by HDFS-8808.
*/
@Test
@Test(timeout=30000)
public void testRateThrottling() throws Exception {
cluster.getConfiguration(0).setLong(
DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 1);
@ -229,23 +236,46 @@ public void testRateThrottling() throws Exception {
cluster.waitActive();
nn0 = cluster.getNameNode(0);
cluster.transitionToActive(0);
// Each edit has at least 1 byte. So the lowRate definitely should cause
// a timeout, if enforced. If lowRate is not enforced, any reasonable test
// machine should at least download an image with 5 edits in 5 seconds.
for (int i = 0; i < 5; i++) {
// Any reasonable test machine should be able to transfer 1 byte per MS
// (which is ~1K/s)
final int minXferRatePerMS = 1;
int imageXferBufferSize = DFSUtilClient.getIoFileBufferSize(
new Configuration());
File imageFile = null;
int dirIdx = 0;
while (imageFile == null || imageFile.length() < imageXferBufferSize) {
for (int i = 0; i < 5; i++) {
cluster.getFileSystem(0).mkdirs(new Path("/foo" + dirIdx++));
}
nn0.getRpcServer().rollEditLog();
NameNodeAdapter.enterSafeMode(nn0, false);
NameNodeAdapter.saveNamespace(nn0);
NameNodeAdapter.leaveSafeMode(nn0);
imageFile = FSImageTestUtil.findLatestImageFile(FSImageTestUtil
.getFSImage(nn0).getStorage().getStorageDir(0));
}
final int timeOut = (int)(imageFile.length() / minXferRatePerMS) + 1;
// A very low DFS_IMAGE_TRANSFER_RATE_KEY value won't affect bootstrapping
final AtomicBoolean bootStrapped = new AtomicBoolean(false);
new Thread(
new Runnable() {
@Override
public void run() {
try {
testSuccessfulBaseCase();
bootStrapped.set(true);
} catch (Exception e) {
fail(e.getMessage());
}
}
}
).start();
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
try {
testSuccessfulBaseCase();
return true;
} catch (Exception e) {
return false;
}
return bootStrapped.get();
}
}, 500, 5000);
}, 50, timeOut);
shutdownCluster();
setupCluster();
@ -257,17 +287,26 @@ public Boolean get() {
cluster.transitionToActive(0);
// A very low DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY value should
// cause timeout
bootStrapped.set(false);
new Thread(
new Runnable() {
@Override
public void run() {
try {
testSuccessfulBaseCase();
bootStrapped.set(true);
} catch (Exception e) {
LOG.info(e.getMessage());
}
}
}
).start();
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
try {
testSuccessfulBaseCase();
return true;
} catch (Exception e) {
return false;
}
return bootStrapped.get();
}
}, 500, 5000);
}, 50, timeOut);
fail("Did not timeout");
} catch (TimeoutException e) {
LOG.info("Encountered expected timeout.");