HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.
This commit is contained in:
parent
cbea5d2db4
commit
ea11590aad
@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED
|
|||||||
HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
|
HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
|
||||||
request is in edit log. (Rushabh S Shah via kihwal)
|
request is in edit log. (Rushabh S Shah via kihwal)
|
||||||
|
|
||||||
|
HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor
|
||||||
|
goes for infinite loop (Rushabh S Shah via kihwal)
|
||||||
|
|
||||||
Release 2.7.0 - 2015-04-20
|
Release 2.7.0 - 2015-04-20
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -43,6 +44,9 @@ public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode,
|
|||||||
DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
|
DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
|
||||||
try {
|
try {
|
||||||
bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
|
bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
|
||||||
|
} catch (RemoteException re) {
|
||||||
|
DataNode.LOG.info("trySendErrorReport encountered RemoteException "
|
||||||
|
+ "errorMessage: " + errorMessage + " errorCode: " + errorCode, re);
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
throw new BPServiceActorActionException("Error reporting "
|
throw new BPServiceActorActionException("Error reporting "
|
||||||
+ "an error to namenode: ");
|
+ "an error to namenode: ");
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
|
* ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
|
||||||
@ -59,6 +60,9 @@ public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode,
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
bpNamenode.reportBadBlocks(locatedBlock);
|
bpNamenode.reportBadBlocks(locatedBlock);
|
||||||
|
} catch (RemoteException re) {
|
||||||
|
DataNode.LOG.info("reportBadBlock encountered RemoteException for "
|
||||||
|
+ "block: " + block , re);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new BPServiceActorActionException("Failed to report bad block "
|
throw new BPServiceActorActionException("Failed to report bad block "
|
||||||
+ block + " to namenode: ");
|
+ block + " to namenode: ");
|
||||||
|
@ -55,6 +55,9 @@
|
|||||||
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
|
import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
|
||||||
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
import org.apache.hadoop.ipc.StandbyException;
|
||||||
|
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.PathUtils;
|
import org.apache.hadoop.test.PathUtils;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
@ -621,4 +624,55 @@ public Void answer(InvocationOnMock invocation) throws Throwable {
|
|||||||
bpos.stop();
|
bpos.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This test case doesn't add the reportBadBlock request to
|
||||||
|
* {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws
|
||||||
|
* {@link StandbyException}
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testReportBadBlocksWhenNNThrowsStandbyException()
|
||||||
|
throws Exception {
|
||||||
|
BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
|
||||||
|
bpos.start();
|
||||||
|
try {
|
||||||
|
waitForInitialization(bpos);
|
||||||
|
// Should start with neither NN as active.
|
||||||
|
assertNull(bpos.getActiveNN());
|
||||||
|
// Have NN1 claim active at txid 1
|
||||||
|
mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
|
||||||
|
bpos.triggerHeartbeatForTests();
|
||||||
|
// Now mockNN1 is acting like active namenode and mockNN2 as Standby
|
||||||
|
assertSame(mockNN1, bpos.getActiveNN());
|
||||||
|
// Return nothing when active Active Namenode calls reportBadBlocks
|
||||||
|
Mockito.doNothing().when(mockNN1).reportBadBlocks
|
||||||
|
(Mockito.any(LocatedBlock[].class));
|
||||||
|
|
||||||
|
RemoteException re = new RemoteException(StandbyException.class.
|
||||||
|
getName(), "Operation category WRITE is not supported in state "
|
||||||
|
+ "standby", RpcErrorCodeProto.ERROR_APPLICATION);
|
||||||
|
// Return StandbyException wrapped in RemoteException when Standby NN
|
||||||
|
// calls reportBadBlocks
|
||||||
|
Mockito.doThrow(re).when(mockNN2).reportBadBlocks
|
||||||
|
(Mockito.any(LocatedBlock[].class));
|
||||||
|
|
||||||
|
bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK)
|
||||||
|
.getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK)
|
||||||
|
.getStorageType());
|
||||||
|
// Send heartbeat so that the BpServiceActor can report bad block to
|
||||||
|
// namenode
|
||||||
|
bpos.triggerHeartbeatForTests();
|
||||||
|
Mockito.verify(mockNN2, Mockito.times(1))
|
||||||
|
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
|
||||||
|
|
||||||
|
// Trigger another heartbeat, this will send reportBadBlock again if it
|
||||||
|
// is present in the queue.
|
||||||
|
bpos.triggerHeartbeatForTests();
|
||||||
|
Mockito.verify(mockNN2, Mockito.times(1))
|
||||||
|
.reportBadBlocks(Mockito.any(LocatedBlock[].class));
|
||||||
|
} finally {
|
||||||
|
bpos.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user