HDFS-11094. Send back HAState along with NamespaceInfo during a versionRequest as an optional parameter. Contributed by Eric Badger
This commit is contained in:
parent
631f1daee3
commit
ac0d402c6a
@ -42,7 +42,6 @@
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.DatanodeRegistrationProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.FinalizeCommandProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.KeyUpdateCommandProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.NNHAStatusHeartbeatProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.ReceivedDeletedBlockInfoProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.RegisterCommandProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.VolumeFailureSummaryProto;
|
||||
@ -66,6 +65,7 @@
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto.NamenodeRoleProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamespaceInfoProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NNHAStatusHeartbeatProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RecoveringBlockProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogManifestProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogProto;
|
||||
@ -337,7 +337,8 @@ public static NamespaceInfo convert(NamespaceInfoProto info) {
|
||||
StorageInfoProto storage = info.getStorageInfo();
|
||||
return new NamespaceInfo(storage.getNamespceID(), storage.getClusterID(),
|
||||
info.getBlockPoolID(), storage.getCTime(), info.getBuildVersion(),
|
||||
info.getSoftwareVersion(), info.getCapabilities());
|
||||
info.getSoftwareVersion(), info.getCapabilities(),
|
||||
convert(info.getState()));
|
||||
}
|
||||
|
||||
public static NamenodeCommand convert(NamenodeCommandProto cmd) {
|
||||
@ -743,43 +744,64 @@ public static ReceivedDeletedBlockInfo convert(
|
||||
}
|
||||
|
||||
public static NamespaceInfoProto convert(NamespaceInfo info) {
|
||||
return NamespaceInfoProto.newBuilder()
|
||||
.setBlockPoolID(info.getBlockPoolID())
|
||||
NamespaceInfoProto.Builder builder = NamespaceInfoProto.newBuilder();
|
||||
builder.setBlockPoolID(info.getBlockPoolID())
|
||||
.setBuildVersion(info.getBuildVersion())
|
||||
.setUnused(0)
|
||||
.setStorageInfo(PBHelper.convert((StorageInfo)info))
|
||||
.setSoftwareVersion(info.getSoftwareVersion())
|
||||
.setCapabilities(info.getCapabilities())
|
||||
.build();
|
||||
.setCapabilities(info.getCapabilities());
|
||||
HAServiceState state = info.getState();
|
||||
if(state != null) {
|
||||
builder.setState(convert(info.getState()));
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
public static HAServiceState convert(NNHAStatusHeartbeatProto.State s) {
|
||||
if (s == null) {
|
||||
return null;
|
||||
}
|
||||
switch (s) {
|
||||
case ACTIVE:
|
||||
return HAServiceState.ACTIVE;
|
||||
case STANDBY:
|
||||
return HAServiceState.STANDBY;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected HAServiceStateProto:"
|
||||
+ s);
|
||||
}
|
||||
}
|
||||
|
||||
public static NNHAStatusHeartbeatProto.State convert(HAServiceState s) {
|
||||
if (s == null) {
|
||||
return null;
|
||||
}
|
||||
switch (s) {
|
||||
case ACTIVE:
|
||||
return NNHAStatusHeartbeatProto.State.ACTIVE;
|
||||
case STANDBY:
|
||||
return NNHAStatusHeartbeatProto.State.STANDBY;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected HAServiceState:"
|
||||
+ s);
|
||||
}
|
||||
}
|
||||
|
||||
public static NNHAStatusHeartbeat convert(NNHAStatusHeartbeatProto s) {
|
||||
if (s == null) return null;
|
||||
switch (s.getState()) {
|
||||
case ACTIVE:
|
||||
return new NNHAStatusHeartbeat(HAServiceState.ACTIVE, s.getTxid());
|
||||
case STANDBY:
|
||||
return new NNHAStatusHeartbeat(HAServiceState.STANDBY, s.getTxid());
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" + s.getState());
|
||||
if (s == null) {
|
||||
return null;
|
||||
}
|
||||
return new NNHAStatusHeartbeat(convert(s.getState()), s.getTxid());
|
||||
}
|
||||
|
||||
public static NNHAStatusHeartbeatProto convert(NNHAStatusHeartbeat hb) {
|
||||
if (hb == null) return null;
|
||||
NNHAStatusHeartbeatProto.Builder builder =
|
||||
NNHAStatusHeartbeatProto.newBuilder();
|
||||
switch (hb.getState()) {
|
||||
case ACTIVE:
|
||||
builder.setState(NNHAStatusHeartbeatProto.State.ACTIVE);
|
||||
break;
|
||||
case STANDBY:
|
||||
builder.setState(NNHAStatusHeartbeatProto.State.STANDBY);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" +
|
||||
hb.getState());
|
||||
if (hb == null) {
|
||||
return null;
|
||||
}
|
||||
NNHAStatusHeartbeatProto.Builder builder =
|
||||
NNHAStatusHeartbeatProto.newBuilder();
|
||||
builder.setState(convert(hb.getState()));
|
||||
builder.setTxid(hb.getTxId());
|
||||
return builder.build();
|
||||
}
|
||||
|
@ -307,8 +307,16 @@ DataNode getDataNode() {
|
||||
* verifies that this namespace matches (eg to prevent a misconfiguration
|
||||
* where a StandbyNode from a different cluster is specified)
|
||||
*/
|
||||
void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException {
|
||||
void verifyAndSetNamespaceInfo(BPServiceActor actor, NamespaceInfo nsInfo)
|
||||
throws IOException {
|
||||
writeLock();
|
||||
|
||||
if(nsInfo.getState() == HAServiceState.ACTIVE
|
||||
&& bpServiceToActive == null) {
|
||||
LOG.info("Acknowledging ACTIVE Namenode during handshake" + actor);
|
||||
bpServiceToActive = actor;
|
||||
}
|
||||
|
||||
try {
|
||||
if (this.bpNSInfo == null) {
|
||||
this.bpNSInfo = nsInfo;
|
||||
|
@ -273,7 +273,7 @@ private void connectToNNAndHandshake() throws IOException {
|
||||
// Verify that this matches the other NN in this HA pair.
|
||||
// This also initializes our block pool in the DN if we are
|
||||
// the first NN connection for this BP.
|
||||
bpos.verifyAndSetNamespaceInfo(nsInfo);
|
||||
bpos.verifyAndSetNamespaceInfo(this, nsInfo);
|
||||
|
||||
// Second phase of the handshake with the NN.
|
||||
register(nsInfo);
|
||||
|
@ -1594,7 +1594,7 @@ long getCTime() {
|
||||
NamespaceInfo unprotectedGetNamespaceInfo() {
|
||||
return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(),
|
||||
getClusterId(), getBlockPoolId(),
|
||||
getFSImage().getStorage().getCTime());
|
||||
getFSImage().getStorage().getCTime(), getState());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -4537,6 +4537,10 @@ public int getBlockCapacity() {
|
||||
return blockManager.getCapacity();
|
||||
}
|
||||
|
||||
public HAServiceState getState() {
|
||||
return haContext == null ? null : haContext.getState().getServiceState();
|
||||
}
|
||||
|
||||
@Override // FSNamesystemMBean
|
||||
public String getFSState() {
|
||||
return isInSafeMode() ? "safeMode" : "Operational";
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||
@ -44,6 +45,7 @@ public class NamespaceInfo extends StorageInfo {
|
||||
String blockPoolID = ""; // id of the block pool
|
||||
String softwareVersion;
|
||||
long capabilities;
|
||||
HAServiceState state;
|
||||
|
||||
// only authoritative on the server-side to determine advertisement to
|
||||
// clients. enum will update the supported values
|
||||
@ -88,6 +90,14 @@ public NamespaceInfo(int nsID, String clusterID, String bpID,
|
||||
CAPABILITIES_SUPPORTED);
|
||||
}
|
||||
|
||||
public NamespaceInfo(int nsID, String clusterID, String bpID,
|
||||
long cT, String buildVersion, String softwareVersion,
|
||||
long capabilities, HAServiceState st) {
|
||||
this(nsID, clusterID, bpID, cT, buildVersion, softwareVersion,
|
||||
capabilities);
|
||||
this.state = st;
|
||||
}
|
||||
|
||||
// for use by server and/or client
|
||||
public NamespaceInfo(int nsID, String clusterID, String bpID,
|
||||
long cT, String buildVersion, String softwareVersion,
|
||||
@ -106,6 +116,13 @@ public NamespaceInfo(int nsID, String clusterID, String bpID,
|
||||
VersionInfo.getVersion());
|
||||
}
|
||||
|
||||
public NamespaceInfo(int nsID, String clusterID, String bpID,
|
||||
long cT, HAServiceState st) {
|
||||
this(nsID, clusterID, bpID, cT, Storage.getBuildVersion(),
|
||||
VersionInfo.getVersion());
|
||||
this.state = st;
|
||||
}
|
||||
|
||||
public long getCapabilities() {
|
||||
return capabilities;
|
||||
}
|
||||
@ -115,6 +132,11 @@ public void setCapabilities(long capabilities) {
|
||||
this.capabilities = capabilities;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void setState(HAServiceState state) {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public boolean isCapabilitySupported(Capability capability) {
|
||||
Preconditions.checkArgument(capability != Capability.UNKNOWN,
|
||||
"cannot test for unknown capability");
|
||||
@ -134,6 +156,10 @@ public String getSoftwareVersion() {
|
||||
return softwareVersion;
|
||||
}
|
||||
|
||||
public HAServiceState getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
return super.toString() + ";bpid=" + blockPoolID;
|
||||
|
@ -208,19 +208,6 @@ message HeartbeatRequestProto {
|
||||
optional bool requestFullBlockReportLease = 9 [ default = false ];
|
||||
}
|
||||
|
||||
/**
|
||||
* state - State the NN is in when returning response to the DN
|
||||
* txid - Highest transaction ID this NN has seen
|
||||
*/
|
||||
message NNHAStatusHeartbeatProto {
|
||||
enum State {
|
||||
ACTIVE = 0;
|
||||
STANDBY = 1;
|
||||
}
|
||||
required State state = 1;
|
||||
required uint64 txid = 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* cmds - Commands from namenode to datanode.
|
||||
* haStatus - Status (from an HA perspective) of the NN sending this response
|
||||
|
@ -32,6 +32,7 @@ option java_generate_equals_and_hash = true;
|
||||
package hadoop.hdfs;
|
||||
|
||||
import "hdfs.proto";
|
||||
import "HAServiceProtocol.proto";
|
||||
|
||||
/**
|
||||
* Block access token information
|
||||
@ -101,6 +102,7 @@ message NamespaceInfoProto {
|
||||
required StorageInfoProto storageInfo = 4;// Node information
|
||||
required string softwareVersion = 5; // Software version number (e.g. 2.0.0)
|
||||
optional uint64 capabilities = 6 [default = 0]; // feature flags
|
||||
optional NNHAStatusHeartbeatProto.State state = 7;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -201,3 +203,16 @@ message NamenodeRegistrationProto {
|
||||
required StorageInfoProto storageInfo = 3; // Node information
|
||||
optional NamenodeRoleProto role = 4 [default = NAMENODE]; // Namenode role
|
||||
}
|
||||
|
||||
/**
|
||||
* state - State the NN is in when returning response to the DN
|
||||
* txid - Highest transaction ID this NN has seen
|
||||
*/
|
||||
message NNHAStatusHeartbeatProto {
|
||||
enum State {
|
||||
ACTIVE = 0;
|
||||
STANDBY = 1;
|
||||
}
|
||||
required State state = 1;
|
||||
required uint64 txid = 2;
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertSame;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
@ -799,4 +800,35 @@ private int getStandbyIBRSize(BPOfferService bpos) {
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public void testNNHAStateUpdateFromVersionRequest() throws Exception {
|
||||
final BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
|
||||
Mockito.doReturn(true).when(mockDn).areHeartbeatsDisabledForTests();
|
||||
BPServiceActor actor = bpos.getBPServiceActors().get(0);
|
||||
bpos.start();
|
||||
waitForInitialization(bpos);
|
||||
// Should start with neither NN as active.
|
||||
assertNull(bpos.getActiveNN());
|
||||
|
||||
// getNamespaceInfo() will not include HAServiceState
|
||||
NamespaceInfo nsInfo = mockNN1.versionRequest();
|
||||
bpos.verifyAndSetNamespaceInfo(actor, nsInfo);
|
||||
|
||||
assertNull(bpos.getActiveNN());
|
||||
|
||||
// Change mock so getNamespaceInfo() will include HAServiceState
|
||||
Mockito.doReturn(new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID, 0,
|
||||
HAServiceState.ACTIVE)).when(mockNN1).versionRequest();
|
||||
|
||||
// Update the bpos NamespaceInfo
|
||||
nsInfo = mockNN1.versionRequest();
|
||||
bpos.verifyAndSetNamespaceInfo(actor, nsInfo);
|
||||
|
||||
assertNotNull(bpos.getActiveNN());
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
@ -43,6 +44,7 @@
|
||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||
import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger;
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||
import org.junit.After;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
@ -154,6 +156,25 @@ public void testReplQueuesActiveAfterStartupSafemode() throws IOException, Inter
|
||||
+ "safemode 2nd time", bm.isPopulatingReplQueues());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHAStateInNamespaceInfo() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
|
||||
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
|
||||
FSImage fsImage = Mockito.mock(FSImage.class);
|
||||
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
|
||||
NNStorage nnStorage = Mockito.mock(NNStorage.class);
|
||||
Mockito.when(fsImage.getStorage()).thenReturn(nnStorage);
|
||||
|
||||
FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage);
|
||||
FSNamesystem fsn = Mockito.spy(fsNamesystem);
|
||||
Mockito.when(fsn.getState()).thenReturn(
|
||||
HAServiceProtocol.HAServiceState.ACTIVE);
|
||||
|
||||
NamespaceInfo nsInfo = fsn.unprotectedGetNamespaceInfo();
|
||||
assertNotNull(nsInfo.getState());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReset() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
|
Loading…
Reference in New Issue
Block a user