HDFS-16061. DFTestUtil.waitReplication can produce false positives (#3095). Contributed by Ahmed Hussein.
Reviewed-by: Jim Brennan <jbrennan@apache.org> Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
parent
8d241482bd
commit
92ade1f6f9
@ -799,41 +799,48 @@ public String[] getFileNames(String topDir) {
|
||||
|
||||
/**
|
||||
* Wait for the given file to reach the given replication factor.
|
||||
* @throws TimeoutException if we fail to sufficiently replicate the file
|
||||
*
|
||||
* @param fs the defined filesystem.
|
||||
* @param fileName being written.
|
||||
* @param replFactor desired replication
|
||||
* @throws IOException getting block locations
|
||||
* @throws InterruptedException during sleep
|
||||
* @throws TimeoutException if 40 seconds passed before reaching the desired
|
||||
* replication.
|
||||
*/
|
||||
public static void waitReplication(FileSystem fs, Path fileName, short replFactor)
|
||||
public static void waitReplication(FileSystem fs, Path fileName,
|
||||
short replFactor)
|
||||
throws IOException, InterruptedException, TimeoutException {
|
||||
boolean correctReplFactor;
|
||||
final int ATTEMPTS = 40;
|
||||
int count = 0;
|
||||
|
||||
int attempt = 0;
|
||||
do {
|
||||
correctReplFactor = true;
|
||||
if (attempt++ > 0) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
BlockLocation locs[] = fs.getFileBlockLocations(
|
||||
fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
|
||||
count++;
|
||||
for (int j = 0; j < locs.length; j++) {
|
||||
String[] hostnames = locs[j].getNames();
|
||||
for (int currLoc = 0; currLoc < locs.length; currLoc++) {
|
||||
String[] hostnames = locs[currLoc].getNames();
|
||||
if (hostnames.length != replFactor) {
|
||||
LOG.info(
|
||||
"Block {} of file {} has replication factor {} "
|
||||
+ "(desired {}); locations: {}",
|
||||
currLoc, fileName, hostnames.length, replFactor,
|
||||
Joiner.on(' ').join(hostnames));
|
||||
correctReplFactor = false;
|
||||
System.out.println("Block " + j + " of file " + fileName
|
||||
+ " has replication factor " + hostnames.length
|
||||
+ " (desired " + replFactor + "); locations "
|
||||
+ Joiner.on(' ').join(hostnames));
|
||||
Thread.sleep(1000);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (correctReplFactor) {
|
||||
System.out.println("All blocks of file " + fileName
|
||||
+ " verified to have replication factor " + replFactor);
|
||||
}
|
||||
} while (!correctReplFactor && count < ATTEMPTS);
|
||||
} while (!correctReplFactor && attempt < 40);
|
||||
|
||||
if (count == ATTEMPTS) {
|
||||
throw new TimeoutException("Timed out waiting for " + fileName +
|
||||
" to reach " + replFactor + " replicas");
|
||||
if (!correctReplFactor) {
|
||||
throw new TimeoutException("Timed out waiting for file ["
|
||||
+ fileName + "] to reach [" + replFactor + "] replicas");
|
||||
}
|
||||
|
||||
LOG.info("All blocks of file {} verified to have replication factor {}",
|
||||
fileName, replFactor);
|
||||
}
|
||||
|
||||
/** delete directory and everything underneath it.*/
|
||||
|
@ -20,13 +20,17 @@
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.Timeout;
|
||||
|
||||
/**
|
||||
* The Balancer ensures that it disperses RPCs to the NameNode
|
||||
* in order to avoid NN's RPC queue saturation.
|
||||
*/
|
||||
public class TestBalancerRPCDelay {
|
||||
@Rule
|
||||
public Timeout globalTimeout = Timeout.seconds(100);
|
||||
|
||||
private TestBalancer testBalancer;
|
||||
|
||||
@ -43,12 +47,12 @@ public void teardown() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=100000)
|
||||
@Test
|
||||
public void testBalancerRPCDelayQps3() throws Exception {
|
||||
testBalancer.testBalancerRPCDelay(3);
|
||||
}
|
||||
|
||||
@Test(timeout=100000)
|
||||
@Test
|
||||
public void testBalancerRPCDelayQpsDefault() throws Exception {
|
||||
testBalancer.testBalancerRPCDelay(
|
||||
DFSConfigKeys.DFS_NAMENODE_GETBLOCKS_MAX_QPS_DEFAULT);
|
||||
|
Loading…
Reference in New Issue
Block a user