HDDS-1509. TestBlockOutputStreamWithFailures#test2DatanodesFailure fails intermittently. Contributed by Shashikant Banerjee (#805).
This commit is contained in:
parent
f0e44b3a3f
commit
83549dbbea
@ -27,6 +27,7 @@ import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
|
|||||||
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
||||||
|
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
|
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
|
||||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||||
@ -37,6 +38,7 @@ import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
|
|||||||
import org.apache.hadoop.hdds.scm.XceiverClientManager;
|
import org.apache.hadoop.hdds.scm.XceiverClientManager;
|
||||||
import org.apache.ratis.protocol.AlreadyClosedException;
|
import org.apache.ratis.protocol.AlreadyClosedException;
|
||||||
import org.apache.ratis.protocol.GroupMismatchException;
|
import org.apache.ratis.protocol.GroupMismatchException;
|
||||||
|
import org.apache.ratis.protocol.NotReplicatedException;
|
||||||
import org.apache.ratis.protocol.RaftRetryFailureException;
|
import org.apache.ratis.protocol.RaftRetryFailureException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -259,15 +261,24 @@ public class KeyOutputStream extends OutputStream {
|
|||||||
if (!retryFailure) {
|
if (!retryFailure) {
|
||||||
closedContainerException = checkIfContainerIsClosed(t);
|
closedContainerException = checkIfContainerIsClosed(t);
|
||||||
}
|
}
|
||||||
PipelineID pipelineId = null;
|
Pipeline pipeline = streamEntry.getPipeline();
|
||||||
|
PipelineID pipelineId = pipeline.getId();
|
||||||
long totalSuccessfulFlushedData = streamEntry.getTotalAckDataLength();
|
long totalSuccessfulFlushedData = streamEntry.getTotalAckDataLength();
|
||||||
//set the correct length for the current stream
|
//set the correct length for the current stream
|
||||||
streamEntry.setCurrentPosition(totalSuccessfulFlushedData);
|
streamEntry.setCurrentPosition(totalSuccessfulFlushedData);
|
||||||
long bufferedDataLen = blockOutputStreamEntryPool.computeBufferData();
|
long bufferedDataLen = blockOutputStreamEntryPool.computeBufferData();
|
||||||
|
if (closedContainerException) {
|
||||||
LOG.debug(
|
LOG.debug(
|
||||||
"Encountered exception {}. The last committed block length is {}, "
|
"Encountered exception {}. The last committed block length is {}, "
|
||||||
+ "uncommitted data length is {} retry count {}", exception,
|
+ "uncommitted data length is {} retry count {}", exception,
|
||||||
totalSuccessfulFlushedData, bufferedDataLen, retryCount);
|
totalSuccessfulFlushedData, bufferedDataLen, retryCount);
|
||||||
|
} else {
|
||||||
|
LOG.warn(
|
||||||
|
"Encountered exception {} on the pipeline {}. "
|
||||||
|
+ "The last committed block length is {}, "
|
||||||
|
+ "uncommitted data length is {} retry count {}", exception,
|
||||||
|
pipeline, totalSuccessfulFlushedData, bufferedDataLen, retryCount);
|
||||||
|
}
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
bufferedDataLen <= blockOutputStreamEntryPool.getStreamBufferMaxSize());
|
bufferedDataLen <= blockOutputStreamEntryPool.getStreamBufferMaxSize());
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
@ -282,8 +293,8 @@ public class KeyOutputStream extends OutputStream {
|
|||||||
if (closedContainerException) {
|
if (closedContainerException) {
|
||||||
excludeList.addConatinerId(ContainerID.valueof(containerId));
|
excludeList.addConatinerId(ContainerID.valueof(containerId));
|
||||||
} else if (retryFailure || t instanceof TimeoutException
|
} else if (retryFailure || t instanceof TimeoutException
|
||||||
|| t instanceof GroupMismatchException) {
|
|| t instanceof GroupMismatchException
|
||||||
pipelineId = streamEntry.getPipeline().getId();
|
|| t instanceof NotReplicatedException) {
|
||||||
excludeList.addPipeline(pipelineId);
|
excludeList.addPipeline(pipelineId);
|
||||||
}
|
}
|
||||||
// just clean up the current stream.
|
// just clean up the current stream.
|
||||||
|
@ -36,6 +36,7 @@ import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
|||||||
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
||||||
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
||||||
import org.apache.hadoop.ozone.container.ContainerTestHelper;
|
import org.apache.hadoop.ozone.container.ContainerTestHelper;
|
||||||
|
import org.apache.ratis.protocol.GroupMismatchException;
|
||||||
import org.apache.ratis.protocol.RaftRetryFailureException;
|
import org.apache.ratis.protocol.RaftRetryFailureException;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
@ -75,7 +76,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Before public void init() throws Exception {
|
@Before
|
||||||
|
public void init() throws Exception {
|
||||||
chunkSize = 100;
|
chunkSize = 100;
|
||||||
flushSize = 2 * chunkSize;
|
flushSize = 2 * chunkSize;
|
||||||
maxFlushSize = 2 * flushSize;
|
maxFlushSize = 2 * flushSize;
|
||||||
@ -110,13 +112,15 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
/**
|
/**
|
||||||
* Shutdown MiniDFSCluster.
|
* Shutdown MiniDFSCluster.
|
||||||
*/
|
*/
|
||||||
@After public void shutdown() {
|
@After
|
||||||
|
public void shutdown() {
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testWatchForCommitWithCloseContainerException()
|
@Test
|
||||||
|
public void testWatchForCommitWithCloseContainerException()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
@ -256,7 +260,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testWatchForCommitDatanodeFailure() throws Exception {
|
@Test
|
||||||
|
public void testWatchForCommitDatanodeFailure() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -388,7 +393,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void test2DatanodesFailure() throws Exception {
|
@Test
|
||||||
|
public void test2DatanodesFailure() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -494,8 +500,15 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
// rewritten plus one partial chunk plus two putBlocks for flushSize
|
// rewritten plus one partial chunk plus two putBlocks for flushSize
|
||||||
// and one flush for partial chunk
|
// and one flush for partial chunk
|
||||||
key.flush();
|
key.flush();
|
||||||
Assert.assertTrue(HddsClientUtils.checkForException(blockOutputStream
|
|
||||||
.getIoException()) instanceof RaftRetryFailureException);
|
// Since, 2 datanodes went down, if the pipeline gets destroyed quickly,
|
||||||
|
// it will hit GroupMismatchException else, it will fail with
|
||||||
|
// RaftRetryFailureException
|
||||||
|
Assert.assertTrue((HddsClientUtils.
|
||||||
|
checkForException(blockOutputStream
|
||||||
|
.getIoException()) instanceof RaftRetryFailureException)
|
||||||
|
|| HddsClientUtils.checkForException(
|
||||||
|
blockOutputStream.getIoException()) instanceof GroupMismatchException);
|
||||||
// Make sure the retryCount is reset after the exception is handled
|
// Make sure the retryCount is reset after the exception is handled
|
||||||
Assert.assertTrue(keyOutputStream.getRetryCount() == 0);
|
Assert.assertTrue(keyOutputStream.getRetryCount() == 0);
|
||||||
// now close the stream, It will update the ack length after watchForCommit
|
// now close the stream, It will update the ack length after watchForCommit
|
||||||
@ -524,7 +537,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, data1);
|
validateData(keyName, data1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testFailureWithPrimeSizedData() throws Exception {
|
@Test
|
||||||
|
public void testFailureWithPrimeSizedData() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -644,7 +658,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testExceptionDuringClose() throws Exception {
|
@Test
|
||||||
|
public void testExceptionDuringClose() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -758,7 +773,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testWatchForCommitWithSingleNodeRatis() throws Exception {
|
@Test
|
||||||
|
public void testWatchForCommitWithSingleNodeRatis() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -898,7 +914,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testDatanodeFailureWithSingleNodeRatis() throws Exception {
|
@Test
|
||||||
|
public void testDatanodeFailureWithSingleNodeRatis() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
@ -1037,7 +1054,8 @@ public class TestBlockOutputStreamWithFailures {
|
|||||||
validateData(keyName, dataString.concat(dataString).getBytes());
|
validateData(keyName, dataString.concat(dataString).getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void testDatanodeFailureWithPreAllocation() throws Exception {
|
@Test
|
||||||
|
public void testDatanodeFailureWithPreAllocation() throws Exception {
|
||||||
XceiverClientMetrics metrics =
|
XceiverClientMetrics metrics =
|
||||||
XceiverClientManager.getXceiverClientMetrics();
|
XceiverClientManager.getXceiverClientMetrics();
|
||||||
long writeChunkCount =
|
long writeChunkCount =
|
||||||
|
Loading…
x
Reference in New Issue
Block a user