HDFS-16146. All three replicas are lost due to not adding a new DataN… (#3247) Contributed by Shuyan Zhang.
Reviewed-by: He Xiaoqiao <hexiaoqiao@apache.org> Reviewed-by: Wei-Chiu Chuang <weichiu@apache.org>
This commit is contained in:
parent
a5811dda7b
commit
10a2526b0b
@ -1386,19 +1386,11 @@ private void addDatanode2ExistingPipeline() throws IOException {
|
||||
* Case 2: Failure in Streaming
|
||||
* - Append/Create:
|
||||
* + transfer RBW
|
||||
*
|
||||
* Case 3: Failure in Close
|
||||
* - Append/Create:
|
||||
* + no transfer, let NameNode replicates the block.
|
||||
*/
|
||||
if (!isAppend && lastAckedSeqno < 0
|
||||
&& stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
|
||||
//no data have been written
|
||||
return;
|
||||
} else if (stage == BlockConstructionStage.PIPELINE_CLOSE
|
||||
|| stage == BlockConstructionStage.PIPELINE_CLOSE_RECOVERY) {
|
||||
//pipeline is closing
|
||||
return;
|
||||
}
|
||||
|
||||
int tried = 0;
|
||||
|
@ -1492,6 +1492,8 @@ public void run() {
|
||||
if (lastPacketInBlock) {
|
||||
// Finalize the block and close the block file
|
||||
finalizeBlock(startTime);
|
||||
// For test only, no-op in production system.
|
||||
DataNodeFaultInjector.get().delayAckLastPacket();
|
||||
}
|
||||
|
||||
Status myStatus = pkt != null ? pkt.ackStatus : Status.SUCCESS;
|
||||
|
@ -68,6 +68,12 @@ public void delaySendingAckToUpstream(final String upstreamAddr)
|
||||
throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Used as a hook to delay sending the response of the last packet.
|
||||
*/
|
||||
public void delayAckLastPacket() throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Used as a hook to delay writing a packet to disk.
|
||||
*/
|
||||
|
@ -19,12 +19,14 @@
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import java.util.function.Supplier;
|
||||
@ -39,6 +41,7 @@
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
|
||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
|
||||
@ -800,4 +803,94 @@ public void testUpdatePipeLineAfterDNReg()throws Exception {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddingDatanodeDuringClosing() throws Exception {
|
||||
DataNodeFaultInjector dnFaultInjector = new DataNodeFaultInjector() {
|
||||
@Override
|
||||
public void delayAckLastPacket() throws IOException {
|
||||
try {
|
||||
// Makes the PIPELINE_CLOSE stage longer.
|
||||
Thread.sleep(5000);
|
||||
} catch (InterruptedException ie) {
|
||||
throw new IOException("Interrupted while sleeping");
|
||||
}
|
||||
}
|
||||
};
|
||||
DataNodeFaultInjector oldDnInjector = DataNodeFaultInjector.get();
|
||||
DataNodeFaultInjector.set(dnFaultInjector);
|
||||
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
MiniDFSCluster cluster = null;
|
||||
try {
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
|
||||
cluster.waitActive();
|
||||
FileSystem fileSys = cluster.getFileSystem();
|
||||
|
||||
Path file = new Path("/testAddingDatanodeDuringClosing");
|
||||
FSDataOutputStream out = fileSys.create(file);
|
||||
byte[] buffer = new byte[128 * 1024];
|
||||
out.write(buffer);
|
||||
// Wait for the pipeline to be built successfully.
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
if (((DFSOutputStream) out.getWrappedStream()).getStreamer()
|
||||
.getNodes() != null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}, 100, 3000);
|
||||
|
||||
// Get three datanodes on the pipeline.
|
||||
DatanodeInfo[] pipeline =
|
||||
((DFSOutputStream) out.getWrappedStream()).getStreamer().getNodes();
|
||||
DataNode[] dataNodes = new DataNode[3];
|
||||
int i = 0;
|
||||
for (DatanodeInfo info : pipeline) {
|
||||
for (DataNode dn : cluster.getDataNodes()) {
|
||||
if (dn.getDatanodeUuid().equals(info.getDatanodeUuid())) {
|
||||
dataNodes[i++] = dn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the first datanode. According to the default replacement
|
||||
// strategy, no datanode will be added to existing pipeline.
|
||||
dataNodes[0].shutdown();
|
||||
|
||||
// Shutdown the second datanode when the pipeline is closing.
|
||||
new Thread(() -> {
|
||||
try {
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
if (((DFSOutputStream) out.getWrappedStream()).getStreamer()
|
||||
.getStage() == BlockConstructionStage.PIPELINE_CLOSE) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}, 100, 10000);
|
||||
} catch (TimeoutException | InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
dataNodes[1].shutdown();
|
||||
}).start();
|
||||
out.close();
|
||||
// Shutdown the third datanode.
|
||||
dataNodes[2].shutdown();
|
||||
// Check if we can read the file successfully.
|
||||
DFSTestUtil.readFile(fileSys, file);
|
||||
} catch (BlockMissingException e) {
|
||||
fail("The file can not be read! " + e);
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
DataNodeFaultInjector.set(oldDnInjector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user