HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails sporadically. Contributed by Chris Nauroth.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1469839 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
44bf8525a5
commit
16cc4a6e86
@ -2560,6 +2560,9 @@ Release 0.23.8 - UNRELEASED
|
||||
|
||||
HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal)
|
||||
|
||||
HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails
|
||||
sporadically (Chris Nauroth via kihwal)
|
||||
|
||||
Release 0.23.7 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -1286,7 +1286,10 @@ protected void checkDiskError(Exception e ) throws IOException {
|
||||
LOG.warn("checkDiskError: exception: ", e);
|
||||
if (e instanceof SocketException || e instanceof SocketTimeoutException
|
||||
|| e instanceof ClosedByInterruptException
|
||||
|| e.getMessage().startsWith("Broken pipe")) {
|
||||
|| e.getMessage().startsWith("An established connection was aborted")
|
||||
|| e.getMessage().startsWith("Broken pipe")
|
||||
|| e.getMessage().startsWith("Connection reset")
|
||||
|| e.getMessage().contains("java.nio.channels.SocketChannel")) {
|
||||
LOG.info("Not checking disk as checkDiskError was called on a network" +
|
||||
" related exception");
|
||||
return;
|
||||
|
@ -422,6 +422,11 @@ public void testPipelineRecoveryStress() throws Exception {
|
||||
// Disable permissions so that another user can recover the lease.
|
||||
harness.conf.setBoolean(
|
||||
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
|
||||
// This test triggers rapid NN failovers. The client retry policy uses an
|
||||
// exponential backoff. This can quickly lead to long sleep times and even
|
||||
// timeout the whole test. Cap the sleep time at 1s to prevent this.
|
||||
harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
|
||||
1000);
|
||||
|
||||
final MiniDFSCluster cluster = harness.startCluster();
|
||||
try {
|
||||
@ -537,11 +542,10 @@ public FileSystem run() throws Exception {
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to cover the lease on the given file for up to 30
|
||||
* seconds.
|
||||
* Try to recover the lease on the given file for up to 60 seconds.
|
||||
* @param fsOtherUser the filesystem to use for the recoverLease call
|
||||
* @param testPath the path on which to run lease recovery
|
||||
* @throws TimeoutException if lease recover does not succeed within 30
|
||||
* @throws TimeoutException if lease recover does not succeed within 60
|
||||
* seconds
|
||||
* @throws InterruptedException if the thread is interrupted
|
||||
*/
|
||||
@ -564,7 +568,7 @@ public Boolean get() {
|
||||
}
|
||||
return success;
|
||||
}
|
||||
}, 1000, 30000);
|
||||
}, 1000, 60000);
|
||||
} catch (TimeoutException e) {
|
||||
throw new TimeoutException("Timed out recovering lease for " +
|
||||
testPath);
|
||||
|
Loading…
Reference in New Issue
Block a user