HDFS-2627. Determine DN's view of which NN is active based on heartbeat responses. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1211735 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f39aac60e0
commit
6016e95fee
@ -45,3 +45,5 @@ HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized (to
|
|||||||
HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTranslators (todd)
|
HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTranslators (todd)
|
||||||
|
|
||||||
HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse type (todd)
|
HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse type (todd)
|
||||||
|
|
||||||
|
HDFS-2627. Determine DN's view of which NN is active based on heartbeat responses (todd)
|
||||||
|
@ -37,14 +37,15 @@
|
|||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
|
import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
|
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -75,10 +76,31 @@ class BPOfferService {
|
|||||||
UpgradeManagerDatanode upgradeManager = null;
|
UpgradeManagerDatanode upgradeManager = null;
|
||||||
private final DataNode dn;
|
private final DataNode dn;
|
||||||
|
|
||||||
private BPServiceActor bpServiceToActive;
|
/**
|
||||||
|
* A reference to the BPServiceActor associated with the currently
|
||||||
|
* ACTIVE NN. In the case that all NameNodes are in STANDBY mode,
|
||||||
|
* this can be null. If non-null, this must always refer to a member
|
||||||
|
* of the {@link #bpServices} list.
|
||||||
|
*/
|
||||||
|
private BPServiceActor bpServiceToActive = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The list of all actors for namenodes in this nameservice, regardless
|
||||||
|
* of their active or standby states.
|
||||||
|
*/
|
||||||
private List<BPServiceActor> bpServices =
|
private List<BPServiceActor> bpServices =
|
||||||
new CopyOnWriteArrayList<BPServiceActor>();
|
new CopyOnWriteArrayList<BPServiceActor>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each time we receive a heartbeat from a NN claiming to be ACTIVE,
|
||||||
|
* we record that NN's most recent transaction ID here, so long as it
|
||||||
|
* is more recent than the previous value. This allows us to detect
|
||||||
|
* split-brain scenarios in which a prior NN is still asserting its
|
||||||
|
* ACTIVE state but with a too-low transaction ID. See HDFS-2627
|
||||||
|
* for details.
|
||||||
|
*/
|
||||||
|
private long lastActiveClaimTxId = -1;
|
||||||
|
|
||||||
BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
|
BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
|
||||||
Preconditions.checkArgument(!nnAddrs.isEmpty(),
|
Preconditions.checkArgument(!nnAddrs.isEmpty(),
|
||||||
"Must pass at least one NN.");
|
"Must pass at least one NN.");
|
||||||
@ -87,10 +109,6 @@ class BPOfferService {
|
|||||||
for (InetSocketAddress addr : nnAddrs) {
|
for (InetSocketAddress addr : nnAddrs) {
|
||||||
this.bpServices.add(new BPServiceActor(addr, this));
|
this.bpServices.add(new BPServiceActor(addr, this));
|
||||||
}
|
}
|
||||||
// TODO(HA): currently we just make the first one the initial
|
|
||||||
// active. In reality it should start in an unknown state and then
|
|
||||||
// as we figure out which is active, designate one as such.
|
|
||||||
this.bpServiceToActive = this.bpServices.get(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void refreshNNList(ArrayList<InetSocketAddress> addrs) throws IOException {
|
void refreshNNList(ArrayList<InetSocketAddress> addrs) throws IOException {
|
||||||
@ -109,19 +127,23 @@ void refreshNNList(ArrayList<InetSocketAddress> addrs) throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns true if BP thread has completed initialization of storage
|
* @return true if the service has registered with at least one NameNode.
|
||||||
* and has registered with the corresponding namenode
|
|
||||||
* @return true if initialized
|
|
||||||
*/
|
*/
|
||||||
boolean isInitialized() {
|
boolean isInitialized() {
|
||||||
// TODO(HA) is this right?
|
return bpRegistration != null;
|
||||||
return bpServiceToActive != null && bpServiceToActive.isInitialized();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if there is at least one actor thread running which is
|
||||||
|
* talking to a NameNode.
|
||||||
|
*/
|
||||||
boolean isAlive() {
|
boolean isAlive() {
|
||||||
// TODO: should || all the bp actors probably?
|
for (BPServiceActor actor : bpServices) {
|
||||||
return bpServiceToActive != null &&
|
if (actor.isAlive()) {
|
||||||
bpServiceToActive.isAlive();
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
String getBlockPoolId() {
|
String getBlockPoolId() {
|
||||||
@ -322,7 +344,7 @@ DatanodeRegistration createRegistration() {
|
|||||||
* Called when an actor shuts down. If this is the last actor
|
* Called when an actor shuts down. If this is the last actor
|
||||||
* to shut down, shuts down the whole blockpool in the DN.
|
* to shut down, shuts down the whole blockpool in the DN.
|
||||||
*/
|
*/
|
||||||
void shutdownActor(BPServiceActor actor) {
|
synchronized void shutdownActor(BPServiceActor actor) {
|
||||||
if (bpServiceToActive == actor) {
|
if (bpServiceToActive == actor) {
|
||||||
bpServiceToActive = null;
|
bpServiceToActive = null;
|
||||||
}
|
}
|
||||||
@ -339,7 +361,7 @@ void shutdownActor(BPServiceActor actor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
@Deprecated
|
||||||
InetSocketAddress getNNSocketAddress() {
|
synchronized InetSocketAddress getNNSocketAddress() {
|
||||||
// TODO(HA) this doesn't make sense anymore
|
// TODO(HA) this doesn't make sense anymore
|
||||||
return bpServiceToActive.getNNSocketAddress();
|
return bpServiceToActive.getNNSocketAddress();
|
||||||
}
|
}
|
||||||
@ -383,8 +405,61 @@ void reportRemoteBadBlock(DatanodeInfo dnInfo, ExtendedBlock block) {
|
|||||||
* @return a proxy to the active NN
|
* @return a proxy to the active NN
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
DatanodeProtocol getActiveNN() {
|
synchronized DatanodeProtocol getActiveNN() {
|
||||||
return bpServiceToActive.bpNamenode;
|
if (bpServiceToActive != null) {
|
||||||
|
return bpServiceToActive.bpNamenode;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the BPOS's view of which NN is active, based on a heartbeat
|
||||||
|
* response from one of the actors.
|
||||||
|
*
|
||||||
|
* @param actor the actor which received the heartbeat
|
||||||
|
* @param nnHaState the HA-related heartbeat contents
|
||||||
|
*/
|
||||||
|
synchronized void updateActorStatesFromHeartbeat(
|
||||||
|
BPServiceActor actor,
|
||||||
|
NNHAStatusHeartbeat nnHaState) {
|
||||||
|
final long txid = nnHaState.getTxId();
|
||||||
|
|
||||||
|
final boolean nnClaimsActive =
|
||||||
|
nnHaState.getState() == NNHAStatusHeartbeat.State.ACTIVE;
|
||||||
|
final boolean bposThinksActive = bpServiceToActive == actor;
|
||||||
|
final boolean isMoreRecentClaim = txid > lastActiveClaimTxId;
|
||||||
|
|
||||||
|
if (nnClaimsActive && !bposThinksActive) {
|
||||||
|
LOG.info("Namenode " + actor + " trying to claim ACTIVE state with " +
|
||||||
|
"txid=" + txid);
|
||||||
|
if (!isMoreRecentClaim) {
|
||||||
|
// Split-brain scenario - an NN is trying to claim active
|
||||||
|
// state when a different NN has already claimed it with a higher
|
||||||
|
// txid.
|
||||||
|
LOG.warn("NN " + actor + " tried to claim ACTIVE state at txid=" +
|
||||||
|
txid + " but there was already a more recent claim at txid=" +
|
||||||
|
lastActiveClaimTxId);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
if (bpServiceToActive == null) {
|
||||||
|
LOG.info("Acknowledging ACTIVE Namenode " + actor);
|
||||||
|
} else {
|
||||||
|
LOG.info("Namenode " + actor + " taking over ACTIVE state from " +
|
||||||
|
bpServiceToActive + " at higher txid=" + txid);
|
||||||
|
}
|
||||||
|
bpServiceToActive = actor;
|
||||||
|
}
|
||||||
|
} else if (!nnClaimsActive && bposThinksActive) {
|
||||||
|
LOG.info("Namenode " + actor + " relinquishing ACTIVE state with " +
|
||||||
|
"txid=" + nnHaState.getTxId());
|
||||||
|
bpServiceToActive = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bpServiceToActive == actor) {
|
||||||
|
assert txid >= lastActiveClaimTxId;
|
||||||
|
lastActiveClaimTxId = txid;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -415,7 +490,17 @@ void triggerBlockReportForTests() throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean processCommandFromActor(DatanodeCommand cmd,
|
/**
|
||||||
|
* Run an immediate heartbeat from all actors. Used by tests.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
void triggerHeartbeatForTests() throws IOException {
|
||||||
|
for (BPServiceActor actor : bpServices) {
|
||||||
|
actor.triggerHeartbeatForTests();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized boolean processCommandFromActor(DatanodeCommand cmd,
|
||||||
BPServiceActor actor) throws IOException {
|
BPServiceActor actor) throws IOException {
|
||||||
assert bpServices.contains(actor);
|
assert bpServices.contains(actor);
|
||||||
if (actor == bpServiceToActive) {
|
if (actor == bpServiceToActive) {
|
||||||
|
@ -285,6 +285,14 @@ void triggerBlockReportForTests() throws IOException {
|
|||||||
blockReport();
|
blockReport();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void triggerHeartbeatForTests() throws IOException {
|
||||||
|
synchronized (receivedAndDeletedBlockList) {
|
||||||
|
lastHeartbeat = 0;
|
||||||
|
receivedAndDeletedBlockList.notifyAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Report the list blocks to the Namenode
|
* Report the list blocks to the Namenode
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
@ -420,8 +428,18 @@ private void offerService() throws Exception {
|
|||||||
lastHeartbeat = startTime;
|
lastHeartbeat = startTime;
|
||||||
if (!dn.areHeartbeatsDisabledForTests()) {
|
if (!dn.areHeartbeatsDisabledForTests()) {
|
||||||
HeartbeatResponse resp = sendHeartBeat();
|
HeartbeatResponse resp = sendHeartBeat();
|
||||||
|
assert resp != null;
|
||||||
dn.getMetrics().addHeartbeat(now() - startTime);
|
dn.getMetrics().addHeartbeat(now() - startTime);
|
||||||
|
|
||||||
|
// If the state of this NN has changed (eg STANDBY->ACTIVE)
|
||||||
|
// then let the BPOfferService update itself.
|
||||||
|
//
|
||||||
|
// Important that this happens before processCommand below,
|
||||||
|
// since the first heartbeat to a new active might have commands
|
||||||
|
// that we should actually process.
|
||||||
|
bpos.updateActorStatesFromHeartbeat(
|
||||||
|
this, resp.getNameNodeHaState());
|
||||||
|
|
||||||
long startProcessCommands = now();
|
long startProcessCommands = now();
|
||||||
if (!processCommand(resp.getCommands()))
|
if (!processCommand(resp.getCommands()))
|
||||||
continue;
|
continue;
|
||||||
|
@ -150,11 +150,16 @@
|
|||||||
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
|
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
|
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.DataNodeMessage;
|
import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.DataNodeMessage;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
|
import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
|
import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
@ -309,6 +314,12 @@ private static final void logAuditEvent(UserGroupInformation ugi,
|
|||||||
*/
|
*/
|
||||||
private EditLogTailer editLogTailer = null;
|
private EditLogTailer editLogTailer = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reference to the NN's HAContext object. This is only set once
|
||||||
|
* {@link #startCommonServices(Configuration, HAContext)} is called.
|
||||||
|
*/
|
||||||
|
private HAContext haContext;
|
||||||
|
|
||||||
PendingDataNodeMessages getPendingDataNodeMessages() {
|
PendingDataNodeMessages getPendingDataNodeMessages() {
|
||||||
return pendingDatanodeMessages;
|
return pendingDatanodeMessages;
|
||||||
}
|
}
|
||||||
@ -434,11 +445,13 @@ void stopSecretManager() {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Start services common to both active and standby states
|
* Start services common to both active and standby states
|
||||||
|
* @param haContext
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
void startCommonServices(Configuration conf) throws IOException {
|
void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
|
||||||
this.registerMBean(); // register the MBean for the FSNamesystemState
|
this.registerMBean(); // register the MBean for the FSNamesystemState
|
||||||
writeLock();
|
writeLock();
|
||||||
|
this.haContext = haContext;
|
||||||
try {
|
try {
|
||||||
nnResourceChecker = new NameNodeResourceChecker(conf);
|
nnResourceChecker = new NameNodeResourceChecker(conf);
|
||||||
checkAvailableResources();
|
checkAvailableResources();
|
||||||
@ -2706,12 +2719,28 @@ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
|
|||||||
cmds = new DatanodeCommand[] {cmd};
|
cmds = new DatanodeCommand[] {cmd};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new HeartbeatResponse(cmds);
|
|
||||||
|
return new HeartbeatResponse(cmds, createHaStatusHeartbeat());
|
||||||
} finally {
|
} finally {
|
||||||
readUnlock();
|
readUnlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private NNHAStatusHeartbeat createHaStatusHeartbeat() {
|
||||||
|
HAState state = haContext.getState();
|
||||||
|
NNHAStatusHeartbeat.State hbState;
|
||||||
|
if (state instanceof ActiveState) {
|
||||||
|
hbState = NNHAStatusHeartbeat.State.ACTIVE;
|
||||||
|
} else if (state instanceof StandbyState) {
|
||||||
|
hbState = NNHAStatusHeartbeat.State.STANDBY;
|
||||||
|
} else {
|
||||||
|
throw new AssertionError("Invalid state: " + state.getClass());
|
||||||
|
}
|
||||||
|
return new NNHAStatusHeartbeat(hbState,
|
||||||
|
Math.max(getFSImage().getLastAppliedTxId(),
|
||||||
|
getFSImage().getEditLog().getLastWrittenTxId()));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether or not there were available resources at the last check of
|
* Returns whether or not there were available resources at the last check of
|
||||||
* resources.
|
* resources.
|
||||||
|
@ -426,7 +426,7 @@ protected void validateConfigurationSettings(final Configuration conf)
|
|||||||
|
|
||||||
/** Start the services common to active and standby states */
|
/** Start the services common to active and standby states */
|
||||||
private void startCommonServices(Configuration conf) throws IOException {
|
private void startCommonServices(Configuration conf) throws IOException {
|
||||||
namesystem.startCommonServices(conf);
|
namesystem.startCommonServices(conf, haContext);
|
||||||
startHttpServer(conf);
|
startHttpServer(conf);
|
||||||
rpcServer.start();
|
rpcServer.start();
|
||||||
plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
|
plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
|
||||||
|
@ -35,18 +35,27 @@ public class HeartbeatResponse implements Writable {
|
|||||||
/** Commands returned from the namenode to the datanode */
|
/** Commands returned from the namenode to the datanode */
|
||||||
private DatanodeCommand[] commands;
|
private DatanodeCommand[] commands;
|
||||||
|
|
||||||
|
/** Information about the current HA-related state of the NN */
|
||||||
|
private NNHAStatusHeartbeat haStatus;
|
||||||
|
|
||||||
public HeartbeatResponse() {
|
public HeartbeatResponse() {
|
||||||
// Empty constructor required for Writable
|
// Empty constructor required for Writable
|
||||||
}
|
}
|
||||||
|
|
||||||
public HeartbeatResponse(DatanodeCommand[] cmds) {
|
public HeartbeatResponse(DatanodeCommand[] cmds,
|
||||||
|
NNHAStatusHeartbeat haStatus) {
|
||||||
commands = cmds;
|
commands = cmds;
|
||||||
|
this.haStatus = haStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DatanodeCommand[] getCommands() {
|
public DatanodeCommand[] getCommands() {
|
||||||
return commands;
|
return commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public NNHAStatusHeartbeat getNameNodeHaState() {
|
||||||
|
return haStatus;
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// Writable
|
// Writable
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
@ -58,6 +67,7 @@ public void write(DataOutput out) throws IOException {
|
|||||||
ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
|
ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
|
||||||
null, true);
|
null, true);
|
||||||
}
|
}
|
||||||
|
haStatus.write(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -69,5 +79,7 @@ public void readFields(DataInput in) throws IOException {
|
|||||||
commands[i] = (DatanodeCommand) ObjectWritable.readObject(in,
|
commands[i] = (DatanodeCommand) ObjectWritable.readObject(in,
|
||||||
objectWritable, null);
|
objectWritable, null);
|
||||||
}
|
}
|
||||||
|
haStatus = new NNHAStatusHeartbeat();
|
||||||
|
haStatus.readFields(in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,73 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.protocol;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.apache.hadoop.io.WritableUtils;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class NNHAStatusHeartbeat implements Writable {
|
||||||
|
|
||||||
|
private State state;
|
||||||
|
private long txid = HdfsConstants.INVALID_TXID;
|
||||||
|
|
||||||
|
public NNHAStatusHeartbeat() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public NNHAStatusHeartbeat(State state, long txid) {
|
||||||
|
this.state = state;
|
||||||
|
this.txid = txid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public State getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTxId() {
|
||||||
|
return txid;
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Writable
|
||||||
|
///////////////////////////////////////////
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
WritableUtils.writeEnum(out, state);
|
||||||
|
out.writeLong(txid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
state = WritableUtils.readEnum(in, State.class);
|
||||||
|
txid = in.readLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public enum State {
|
||||||
|
ACTIVE,
|
||||||
|
STANDBY;
|
||||||
|
}
|
||||||
|
}
|
@ -31,6 +31,7 @@
|
|||||||
@InterfaceStability.Evolving
|
@InterfaceStability.Evolving
|
||||||
public class HeartbeatResponseWritable implements Writable {
|
public class HeartbeatResponseWritable implements Writable {
|
||||||
private DatanodeCommandWritable[] commands;
|
private DatanodeCommandWritable[] commands;
|
||||||
|
private NNHAStatusHeartbeatWritable haStatus;
|
||||||
|
|
||||||
public HeartbeatResponseWritable() {
|
public HeartbeatResponseWritable() {
|
||||||
// Empty constructor for Writable
|
// Empty constructor for Writable
|
||||||
@ -41,7 +42,8 @@ public HeartbeatResponseWritable(DatanodeCommandWritable[] cmds) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public HeartbeatResponse convert() {
|
public HeartbeatResponse convert() {
|
||||||
return new HeartbeatResponse(DatanodeCommandWritable.convert(commands));
|
return new HeartbeatResponse(DatanodeCommandWritable.convert(commands),
|
||||||
|
NNHAStatusHeartbeatWritable.convert(haStatus));
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
@ -55,6 +57,7 @@ public void write(DataOutput out) throws IOException {
|
|||||||
ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
|
ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
|
||||||
null, true);
|
null, true);
|
||||||
}
|
}
|
||||||
|
haStatus.write(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -66,6 +69,8 @@ public void readFields(DataInput in) throws IOException {
|
|||||||
commands[i] = (DatanodeCommandWritable) ObjectWritable.readObject(in,
|
commands[i] = (DatanodeCommandWritable) ObjectWritable.readObject(in,
|
||||||
objectWritable, null);
|
objectWritable, null);
|
||||||
}
|
}
|
||||||
|
haStatus = new NNHAStatusHeartbeatWritable();
|
||||||
|
haStatus.readFields(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HeartbeatResponseWritable convert(
|
public static HeartbeatResponseWritable convert(
|
||||||
|
@ -0,0 +1,77 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.protocolR23Compatible;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.apache.hadoop.io.WritableUtils;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
/**
|
||||||
|
* Response to {@link DatanodeProtocol#sendHeartbeat}
|
||||||
|
*/
|
||||||
|
public class NNHAStatusHeartbeatWritable implements Writable {
|
||||||
|
|
||||||
|
private State state;
|
||||||
|
private long txid = HdfsConstants.INVALID_TXID;
|
||||||
|
|
||||||
|
public NNHAStatusHeartbeatWritable() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public NNHAStatusHeartbeatWritable(State state, long txid) {
|
||||||
|
this.state = state;
|
||||||
|
this.txid = txid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public State getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTxId() {
|
||||||
|
return txid;
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Writable
|
||||||
|
///////////////////////////////////////////
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
WritableUtils.writeEnum(out, state);
|
||||||
|
out.writeLong(txid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
state = WritableUtils.readEnum(in, State.class);
|
||||||
|
txid = in.readLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static NNHAStatusHeartbeat convert(
|
||||||
|
NNHAStatusHeartbeatWritable haStatus) {
|
||||||
|
return new NNHAStatusHeartbeat(haStatus.getState(), haStatus.getTxId());
|
||||||
|
}
|
||||||
|
}
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
@ -32,9 +33,12 @@
|
|||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
|
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
|
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
@ -43,6 +47,8 @@
|
|||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.ArgumentCaptor;
|
import org.mockito.ArgumentCaptor;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
import com.google.common.base.Supplier;
|
import com.google.common.base.Supplier;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
@ -63,13 +69,15 @@ public class TestBPOfferService {
|
|||||||
|
|
||||||
private DatanodeProtocol mockNN1;
|
private DatanodeProtocol mockNN1;
|
||||||
private DatanodeProtocol mockNN2;
|
private DatanodeProtocol mockNN2;
|
||||||
|
private NNHAStatusHeartbeat[] mockHaStatuses = new NNHAStatusHeartbeat[2];
|
||||||
|
private int heartbeatCounts[] = new int[2];
|
||||||
private DataNode mockDn;
|
private DataNode mockDn;
|
||||||
private FSDatasetInterface mockFSDataset;
|
private FSDatasetInterface mockFSDataset;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setupMocks() throws Exception {
|
public void setupMocks() throws Exception {
|
||||||
mockNN1 = setupNNMock();
|
mockNN1 = setupNNMock(0);
|
||||||
mockNN2 = setupNNMock();
|
mockNN2 = setupNNMock(1);
|
||||||
|
|
||||||
// Set up a mock DN with the bare-bones configuration
|
// Set up a mock DN with the bare-bones configuration
|
||||||
// objects, etc.
|
// objects, etc.
|
||||||
@ -92,14 +100,17 @@ public void setupMocks() throws Exception {
|
|||||||
/**
|
/**
|
||||||
* Set up a mock NN with the bare minimum for a DN to register to it.
|
* Set up a mock NN with the bare minimum for a DN to register to it.
|
||||||
*/
|
*/
|
||||||
private DatanodeProtocol setupNNMock() throws Exception {
|
private DatanodeProtocol setupNNMock(int nnIdx) throws Exception {
|
||||||
DatanodeProtocol mock = Mockito.mock(DatanodeProtocol.class);
|
DatanodeProtocol mock = Mockito.mock(DatanodeProtocol.class);
|
||||||
Mockito.doReturn(
|
Mockito.doReturn(
|
||||||
new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
|
new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
|
||||||
0, HdfsConstants.LAYOUT_VERSION))
|
0, HdfsConstants.LAYOUT_VERSION))
|
||||||
.when(mock).versionRequest();
|
.when(mock).versionRequest();
|
||||||
|
|
||||||
Mockito.doReturn(new HeartbeatResponse(null))
|
Mockito.doReturn(new DatanodeRegistration("fake-node"))
|
||||||
|
.when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class));
|
||||||
|
|
||||||
|
Mockito.doAnswer(new HeartbeatAnswer(nnIdx))
|
||||||
.when(mock).sendHeartbeat(
|
.when(mock).sendHeartbeat(
|
||||||
Mockito.any(DatanodeRegistration.class),
|
Mockito.any(DatanodeRegistration.class),
|
||||||
Mockito.anyLong(),
|
Mockito.anyLong(),
|
||||||
@ -109,10 +120,31 @@ private DatanodeProtocol setupNNMock() throws Exception {
|
|||||||
Mockito.anyInt(),
|
Mockito.anyInt(),
|
||||||
Mockito.anyInt(),
|
Mockito.anyInt(),
|
||||||
Mockito.anyInt());
|
Mockito.anyInt());
|
||||||
|
mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(State.STANDBY, 0);
|
||||||
return mock;
|
return mock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mock answer for heartbeats which returns an empty set of commands
|
||||||
|
* and the HA status for the chosen NN from the
|
||||||
|
* {@link TestBPOfferService#mockHaStatuses} array.
|
||||||
|
*/
|
||||||
|
private class HeartbeatAnswer implements Answer<HeartbeatResponse> {
|
||||||
|
private final int nnIdx;
|
||||||
|
|
||||||
|
public HeartbeatAnswer(int nnIdx) {
|
||||||
|
this.nnIdx = nnIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HeartbeatResponse answer(InvocationOnMock invocation) throws Throwable {
|
||||||
|
heartbeatCounts[nnIdx]++;
|
||||||
|
return new HeartbeatResponse(new DatanodeCommand[0],
|
||||||
|
mockHaStatuses[nnIdx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the BPOS can register to talk to two different NNs,
|
* Test that the BPOS can register to talk to two different NNs,
|
||||||
* sends block reports to both, etc.
|
* sends block reports to both, etc.
|
||||||
@ -205,6 +237,53 @@ public void testNNsFromDifferentClusters() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the DataNode determines the active NameNode correctly
|
||||||
|
* based on the HA-related information in heartbeat responses.
|
||||||
|
* See HDFS-2627.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testPickActiveNameNode() throws Exception {
|
||||||
|
BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
|
||||||
|
bpos.start();
|
||||||
|
try {
|
||||||
|
waitForInitialization(bpos);
|
||||||
|
|
||||||
|
// Should start with neither NN as active.
|
||||||
|
assertNull(bpos.getActiveNN());
|
||||||
|
|
||||||
|
// Have NN1 claim active at txid 1
|
||||||
|
mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 1);
|
||||||
|
waitForHeartbeats(bpos);
|
||||||
|
assertSame(mockNN1, bpos.getActiveNN());
|
||||||
|
|
||||||
|
// NN2 claims active at a higher txid
|
||||||
|
mockHaStatuses[1] = new NNHAStatusHeartbeat(State.ACTIVE, 2);
|
||||||
|
waitForHeartbeats(bpos);
|
||||||
|
assertSame(mockNN2, bpos.getActiveNN());
|
||||||
|
|
||||||
|
// Even after another heartbeat from the first NN, it should
|
||||||
|
// think NN2 is active, since it claimed a higher txid
|
||||||
|
waitForHeartbeats(bpos);
|
||||||
|
assertSame(mockNN2, bpos.getActiveNN());
|
||||||
|
|
||||||
|
// Even if NN2 goes to standby, DN shouldn't reset to talking to NN1,
|
||||||
|
// because NN1's txid is lower than the last active txid. Instead,
|
||||||
|
// it should consider neither active.
|
||||||
|
mockHaStatuses[1] = new NNHAStatusHeartbeat(State.STANDBY, 2);
|
||||||
|
waitForHeartbeats(bpos);
|
||||||
|
assertNull(bpos.getActiveNN());
|
||||||
|
|
||||||
|
// Now if NN1 goes back to a higher txid, it should be considered active
|
||||||
|
mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 3);
|
||||||
|
waitForHeartbeats(bpos);
|
||||||
|
assertSame(mockNN1, bpos.getActiveNN());
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
bpos.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void waitForOneToFail(final BPOfferService bpos)
|
private void waitForOneToFail(final BPOfferService bpos)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
@ -269,6 +348,30 @@ public Boolean get() {
|
|||||||
}, 500, 10000);
|
}, 500, 10000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void waitForHeartbeats(BPOfferService bpos)
|
||||||
|
throws Exception {
|
||||||
|
final int countAtStart[];
|
||||||
|
synchronized (heartbeatCounts) {
|
||||||
|
countAtStart = Arrays.copyOf(
|
||||||
|
heartbeatCounts, heartbeatCounts.length);
|
||||||
|
}
|
||||||
|
bpos.triggerHeartbeatForTests();
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
synchronized (heartbeatCounts) {
|
||||||
|
for (int i = 0; i < countAtStart.length; i++) {
|
||||||
|
if (heartbeatCounts[i] <= countAtStart[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 200, 10000);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private ReceivedDeletedBlockInfo[] waitForBlockReceived(
|
private ReceivedDeletedBlockInfo[] waitForBlockReceived(
|
||||||
ExtendedBlock fakeBlock,
|
ExtendedBlock fakeBlock,
|
||||||
DatanodeProtocol mockNN) throws Exception {
|
DatanodeProtocol mockNN) throws Exception {
|
||||||
|
Loading…
Reference in New Issue
Block a user