ozone.acl.authorizer.class
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index ab251cbeec..7b13471534 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -183,6 +183,13 @@ private OMConfigKeys() {
OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
= TimeDuration.valueOf(120, TimeUnit.SECONDS);
+ // OM Leader server role check interval
+ public static final String OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY
+ = "ozone.om.ratis.server.role.check.interval";
+ public static final TimeDuration
+ OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
+ = TimeDuration.valueOf(15, TimeUnit.SECONDS);
+
public static final String OZONE_OM_KERBEROS_KEYTAB_FILE_KEY = "ozone.om."
+ "kerberos.keytab.file";
public static final String OZONE_OM_KERBEROS_PRINCIPAL_KEY = "ozone.om"
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/NotLeaderException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/NotLeaderException.java
new file mode 100644
index 0000000000..974ab0e5ed
--- /dev/null
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/NotLeaderException.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om.exceptions;
+
+import java.io.IOException;
+
+/**
+ * Exception thrown by
+ * {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when
+ * a read request is received by a non leader OM node.
+ */
+public class NotLeaderException extends IOException {
+
+ private final String currentPeerId;
+ private final String leaderPeerId;
+
+ public NotLeaderException(String currentPeerIdStr) {
+ super("OM " + currentPeerIdStr + " is not the leader. Could not " +
+ "determine the leader node.");
+ this.currentPeerId = currentPeerIdStr;
+ this.leaderPeerId = null;
+ }
+
+ public NotLeaderException(String currentPeerIdStr,
+ String suggestedLeaderPeerIdStr) {
+ super("OM " + currentPeerIdStr + " is not the leader. Suggested leader is "
+ + suggestedLeaderPeerIdStr);
+ this.currentPeerId = currentPeerIdStr;
+ this.leaderPeerId = suggestedLeaderPeerIdStr;
+ }
+
+ public String getSuggestedLeaderNodeId() {
+ return leaderPeerId;
+ }
+}
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
index 5c1b39fc0a..b4a4857368 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
@@ -226,8 +226,14 @@ public Class getInterface() {
* not match the current leaderOMNodeId cached by the proxy provider.
*/
public void performFailoverIfRequired(String newLeaderOMNodeId) {
- if (updateLeaderOMNodeId(newLeaderOMNodeId)) {
- LOG.debug("Failing over OM proxy to nodeId: {}", newLeaderOMNodeId);
+ if (newLeaderOMNodeId == null) {
+ LOG.debug("No suggested leader nodeId. Performing failover to next peer" +
+ " node");
+ performFailover(null);
+ } else {
+ if (updateLeaderOMNodeId(newLeaderOMNodeId)) {
+ LOG.debug("Failing over OM proxy to nodeId: {}", newLeaderOMNodeId);
+ }
}
}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OMRatisHelper.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMRatisHelper.java
similarity index 77%
rename from hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OMRatisHelper.java
rename to hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMRatisHelper.java
index 8e4582d660..bc64d6c5a1 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OMRatisHelper.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMRatisHelper.java
@@ -15,7 +15,7 @@
* the License.
*/
-package org.apache.hadoop.ozone.om.ratis;
+package org.apache.hadoop.ozone.om.helpers;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.conf.Configuration;
@@ -25,8 +25,6 @@
.OMRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
.OMResponse;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type;
import org.apache.ratis.RaftConfigKeys;
import org.apache.ratis.client.RaftClient;
import org.apache.ratis.conf.RaftProperties;
@@ -54,14 +52,15 @@ private OMRatisHelper() {
/**
* Creates a new RaftClient object.
- * @param rpcType Replication Type
- * @param omId OM id of the client
- * @param group RaftGroup
+ *
+ * @param rpcType Replication Type
+ * @param omId OM id of the client
+ * @param group RaftGroup
* @param retryPolicy Retry policy
* @return RaftClient object
*/
- static RaftClient newRaftClient(RpcType rpcType, String omId, RaftGroup
- group, RetryPolicy retryPolicy, Configuration conf) {
+ public static RaftClient newRaftClient(RpcType rpcType, String omId, RaftGroup
+ group, RetryPolicy retryPolicy, Configuration conf) {
LOG.trace("newRaftClient: {}, leader={}, group={}", rpcType, omId, group);
final RaftProperties properties = new RaftProperties();
RaftConfigKeys.Rpc.setType(properties, rpcType);
@@ -85,36 +84,27 @@ static RaftPeerId getRaftPeerId(String omId) {
return RaftPeerId.valueOf(omId);
}
- static ByteString convertRequestToByteString(OMRequest request) {
+ public static ByteString convertRequestToByteString(OMRequest request) {
byte[] requestBytes = request.toByteArray();
return ByteString.copyFrom(requestBytes);
}
- static OMRequest convertByteStringToOMRequest(ByteString byteString)
+ public static OMRequest convertByteStringToOMRequest(ByteString byteString)
throws InvalidProtocolBufferException {
byte[] bytes = byteString.toByteArray();
return OMRequest.parseFrom(bytes);
}
- static Message convertResponseToMessage(OMResponse response) {
+ public static Message convertResponseToMessage(OMResponse response) {
byte[] requestBytes = response.toByteArray();
return Message.valueOf(ByteString.copyFrom(requestBytes));
}
- static OMResponse getOMResponseFromRaftClientReply(RaftClientReply reply)
- throws InvalidProtocolBufferException {
+ public static OMResponse getOMResponseFromRaftClientReply(
+ RaftClientReply reply) throws InvalidProtocolBufferException {
byte[] bytes = reply.getMessage().getContent().toByteArray();
return OMResponse.newBuilder(OMResponse.parseFrom(bytes))
.setLeaderOMNodeId(reply.getReplierId())
.build();
}
-
- static OMResponse getErrorResponse(Type cmdType, Exception e) {
- return OMResponse.newBuilder()
- .setCmdType(cmdType)
- .setSuccess(false)
- .setMessage(e.getMessage())
- .setStatus(Status.INTERNAL_ERROR)
- .build();
- }
-}
+}
\ No newline at end of file
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
index 63a656c9f9..c06efdc2c1 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.ipc.ProtobufHelper;
import org.apache.hadoop.ipc.ProtocolTranslator;
import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.om.exceptions.NotLeaderException;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
import org.apache.hadoop.ozone.om.helpers.KeyValueUtil;
@@ -195,29 +196,49 @@ public OzoneManagerProtocolClientSideTranslatorPB(OzoneConfiguration conf,
private OzoneManagerProtocolPB createRetryProxy(
OMFailoverProxyProvider failoverProxyProvider,
int maxRetries, int maxFailovers, int delayMillis, int maxDelayBase) {
+
RetryPolicy retryPolicyOnNetworkException = RetryPolicies
.failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
maxFailovers, maxRetries, delayMillis, maxDelayBase);
+
RetryPolicy retryPolicy = new RetryPolicy() {
@Override
public RetryAction shouldRetry(Exception exception, int retries,
int failovers, boolean isIdempotentOrAtMostOnce)
throws Exception {
- if (exception instanceof EOFException ||
- exception instanceof ServiceException) {
- if (retries < maxRetries && failovers < maxFailovers) {
- return RetryAction.FAILOVER_AND_RETRY;
+
+ if (exception instanceof ServiceException) {
+ Throwable cause = exception.getCause();
+ if (cause instanceof NotLeaderException) {
+ NotLeaderException notLeaderException = (NotLeaderException) cause;
+ omFailoverProxyProvider.performFailoverIfRequired(
+ notLeaderException.getSuggestedLeaderNodeId());
+ return getRetryAction(RetryAction.RETRY, retries, failovers);
} else {
- FAILOVER_PROXY_PROVIDER_LOG.error("Failed to connect to OM. " +
- "Attempted {} retries and {} failovers", retries, failovers);
- return RetryAction.FAIL;
+ return getRetryAction(RetryAction.FAILOVER_AND_RETRY, retries,
+ failovers);
}
+ } else if (exception instanceof EOFException) {
+ return getRetryAction(RetryAction.FAILOVER_AND_RETRY, retries,
+ failovers);
} else {
return retryPolicyOnNetworkException.shouldRetry(
- exception, retries, failovers, isIdempotentOrAtMostOnce);
+ exception, retries, failovers, isIdempotentOrAtMostOnce);
+ }
+ }
+
+ private RetryAction getRetryAction(RetryAction fallbackAction,
+ int retries, int failovers) {
+ if (retries < maxRetries && failovers < maxFailovers) {
+ return fallbackAction;
+ } else {
+ FAILOVER_PROXY_PROVIDER_LOG.error("Failed to connect to OM. " +
+ "Attempted {} retries and {} failovers", retries, failovers);
+ return RetryAction.FAIL;
}
}
};
+
OzoneManagerProtocolPB proxy = (OzoneManagerProtocolPB) RetryProxy.create(
OzoneManagerProtocolPB.class, failoverProxyProvider, retryPolicy);
return proxy;
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
index da8f870650..86a83b78c8 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
@@ -52,8 +52,6 @@
.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY;
import static org.apache.hadoop.ozone.OzoneConfigKeys
.OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT;
-import static org.apache.hadoop.ozone.OzoneConfigKeys
- .OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY;
import static org.apache.hadoop.ozone.OzoneConfigKeys
.OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
import static org.apache.hadoop.ozone.OzoneConfigKeys
@@ -75,7 +73,7 @@ public class TestOzoneManagerHA {
public ExpectedException exception = ExpectedException.none();
@Rule
- public Timeout timeout = new Timeout(120_000);
+ public Timeout timeout = new Timeout(300_000);
/**
* Create a MiniDFSCluster for testing.
@@ -93,7 +91,6 @@ public void init() throws Exception {
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
conf.setInt(OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY, 3);
conf.setInt(OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, 3);
- conf.setInt(OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY, 50);
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
.setClusterId(clusterId)
@@ -313,4 +310,41 @@ public void testOMRetryProxy() throws Exception {
"3 retries and 3 failovers"));
}
}
+
+ @Test
+ public void testReadRequest() throws Exception {
+ String volumeName = "volume" + RandomStringUtils.randomNumeric(5);
+ objectStore.createVolume(volumeName);
+
+ OMFailoverProxyProvider omFailoverProxyProvider =
+ objectStore.getClientProxy().getOMProxyProvider();
+ String currentLeaderNodeId = omFailoverProxyProvider
+ .getCurrentProxyOMNodeId();
+
+ // A read request from any proxy should failover to the current leader OM
+ for (int i = 0; i < numOfOMs; i++) {
+ // Failover OMFailoverProxyProvider to OM at index i
+ OzoneManager ozoneManager = cluster.getOzoneManager(i);
+ String omHostName = ozoneManager.getOmRpcServerAddr().getHostName();
+ int rpcPort = ozoneManager.getOmRpcServerAddr().getPort();
+
+ // Get the ObjectStore and FailoverProxyProvider for OM at index i
+ final ObjectStore store = OzoneClientFactory.getRpcClient(
+ omHostName, rpcPort, conf).getObjectStore();
+ final OMFailoverProxyProvider proxyProvider =
+ store.getClientProxy().getOMProxyProvider();
+
+ // Failover to the OM node that the objectStore points to
+ omFailoverProxyProvider.performFailoverIfRequired(
+ ozoneManager.getOMNodId());
+
+ // A read request should result in the proxyProvider failing over to
+ // leader node.
+ OzoneVolume volume = store.getVolume(volumeName);
+ Assert.assertEquals(volumeName, volume.getName());
+
+ Assert.assertEquals(currentLeaderNodeId,
+ proxyProvider.getCurrentProxyOMNodeId());
+ }
+ }
}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index fc4ad01801..326b12c083 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -1236,8 +1236,8 @@ private RPC.Server getRpcServer(OzoneConfiguration conf) throws IOException {
ProtobufRpcEngine.class);
BlockingService omService = newReflectiveBlockingService(
- new OzoneManagerProtocolServerSideTranslatorPB(this, omRatisClient,
- isRatisEnabled));
+ new OzoneManagerProtocolServerSideTranslatorPB(this, omRatisServer,
+ omRatisClient, isRatisEnabled));
return startRpcServer(configuration, omNodeRpcAddr,
OzoneManagerProtocolPB.class, omService,
handlerCount);
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisClient.java
index 1b4c6347d9..c9c48a4422 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisClient.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisClient.java
@@ -30,6 +30,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ozone.OmUtils;
import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.helpers.OMRatisHelper;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
.OMRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index e03293a8f1..a3cde3e9d5 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -27,8 +27,13 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Optional;
import java.util.UUID;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.StorageUnit;
@@ -41,6 +46,11 @@
import org.apache.ratis.conf.RaftProperties;
import org.apache.ratis.grpc.GrpcConfigKeys;
import org.apache.ratis.netty.NettyConfigKeys;
+import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
+import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
+import org.apache.ratis.protocol.ClientId;
+import org.apache.ratis.protocol.GroupInfoReply;
+import org.apache.ratis.protocol.GroupInfoRequest;
import org.apache.ratis.protocol.RaftGroup;
import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.protocol.RaftPeer;
@@ -50,6 +60,7 @@
import org.apache.ratis.server.RaftServer;
import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.statemachine.impl.BaseStateMachine;
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.util.LifeCycle;
import org.apache.ratis.util.SizeInBytes;
import org.apache.ratis.util.TimeDuration;
@@ -69,7 +80,22 @@ public final class OzoneManagerRatisServer {
private final RaftGroupId raftGroupId;
private final RaftGroup raftGroup;
private final RaftPeerId raftPeerId;
+
private final OzoneManagerProtocol ozoneManager;
+ private final ClientId clientId = ClientId.randomId();
+
+ private final ScheduledExecutorService scheduledRoleChecker;
+ private long roleCheckInitialDelayMs = 1000; // 1 second default
+ private long roleCheckIntervalMs;
+ private ReentrantReadWriteLock roleCheckLock = new ReentrantReadWriteLock();
+ private Optional cachedPeerRole = Optional.empty();
+ private Optional cachedLeaderPeerId = Optional.empty();
+
+ private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();
+
+ private static long nextCallId() {
+ return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE;
+ }
/**
* Returns an OM Ratis server.
@@ -108,6 +134,20 @@ private OzoneManagerRatisServer(Configuration conf, OzoneManagerProtocol om,
.setProperties(serverProperties)
.setStateMachine(getStateMachine(this.raftGroupId))
.build();
+
+ // Run a scheduler to check and update the server role on the leader
+ // periodically
+ this.scheduledRoleChecker = Executors.newSingleThreadScheduledExecutor();
+ this.scheduledRoleChecker.scheduleWithFixedDelay(new Runnable() {
+ @Override
+ public void run() {
+ // Run this check only on the leader OM
+ if (cachedPeerRole.isPresent() &&
+ cachedPeerRole.get() == RaftPeerRole.LEADER) {
+ updateServerRole();
+ }
+ }
+ }, roleCheckInitialDelayMs, roleCheckIntervalMs, TimeUnit.MILLISECONDS);
}
/**
@@ -156,7 +196,11 @@ public RaftGroup getRaftGroup() {
* Returns OzoneManager StateMachine.
*/
private BaseStateMachine getStateMachine(RaftGroupId gid) {
- return new OzoneManagerStateMachine(ozoneManager);
+ return new OzoneManagerStateMachine(this);
+ }
+
+ public OzoneManagerProtocol getOzoneManager() {
+ return ozoneManager;
}
/**
@@ -323,6 +367,19 @@ private RaftProperties newRaftProperties(Configuration conf) {
RaftServerConfigKeys.Rpc.setSlownessTimeout(properties,
nodeFailureTimeout);
+ TimeUnit roleCheckIntervalUnit =
+ OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
+ .getUnit();
+ long roleCheckIntervalDuration = conf.getTimeDuration(
+ OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY,
+ OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
+ .getDuration(), nodeFailureTimeoutUnit);
+ this.roleCheckIntervalMs = TimeDuration.valueOf(
+ roleCheckIntervalDuration, roleCheckIntervalUnit)
+ .toLong(TimeUnit.MILLISECONDS);
+ this.roleCheckInitialDelayMs = leaderElectionMinTimeout
+ .toLong(TimeUnit.MILLISECONDS);
+
/**
* TODO: when ratis snapshots are implemented, set snapshot threshold and
* queue size.
@@ -331,6 +388,104 @@ private RaftProperties newRaftProperties(Configuration conf) {
return properties;
}
+ /**
+ * Check the cached leader status.
+ * @return true if cached role is Leader, false otherwise.
+ */
+ private boolean checkCachedPeerRoleIsLeader() {
+ this.roleCheckLock.readLock().lock();
+ try {
+ if (cachedPeerRole.isPresent() &&
+ cachedPeerRole.get() == RaftPeerRole.LEADER) {
+ return true;
+ }
+ return false;
+ } finally {
+ this.roleCheckLock.readLock().unlock();
+ }
+ }
+
+ /**
+ * Check if the current OM node is the leader node.
+ * @return true if Leader, false otherwise.
+ */
+ public boolean isLeader() {
+ if (checkCachedPeerRoleIsLeader()) {
+ return true;
+ }
+
+ // Get the server role from ratis server and update the cached values.
+ updateServerRole();
+
+ // After updating the server role, check and return if leader or not.
+ return checkCachedPeerRoleIsLeader();
+ }
+
+ /**
+ * Get the suggested leader peer id.
+ * @return RaftPeerId of the suggested leader node.
+ */
+ public Optional getCachedLeaderPeerId() {
+ this.roleCheckLock.readLock().lock();
+ try {
+ return cachedLeaderPeerId;
+ } finally {
+ this.roleCheckLock.readLock().unlock();
+ }
+ }
+
+ /**
+ * Get the gorup info (peer role and leader peer id) from Ratis server and
+ * update the OM server role.
+ */
+ public void updateServerRole() {
+ try {
+ GroupInfoReply groupInfo = getGroupInfo();
+ RoleInfoProto roleInfoProto = groupInfo.getRoleInfoProto();
+ RaftPeerRole thisNodeRole = roleInfoProto.getRole();
+
+ if (thisNodeRole.equals(RaftPeerRole.LEADER)) {
+ setServerRole(thisNodeRole, raftPeerId);
+
+ } else if (thisNodeRole.equals(RaftPeerRole.FOLLOWER)) {
+ ByteString leaderNodeId = roleInfoProto.getFollowerInfo()
+ .getLeaderInfo().getId().getId();
+ RaftPeerId leaderPeerId = RaftPeerId.valueOf(leaderNodeId);
+
+ setServerRole(thisNodeRole, leaderPeerId);
+
+ } else {
+ setServerRole(thisNodeRole, null);
+
+ }
+ } catch (IOException e) {
+ LOG.error("Failed to retrieve RaftPeerRole. Setting cached role to " +
+ "{} and resetting leader info.", RaftPeerRole.UNRECOGNIZED, e);
+ setServerRole(null, null);
+ }
+ }
+
+ /**
+ * Set the current server role and the leader peer id.
+ */
+ private void setServerRole(RaftPeerRole currentRole,
+ RaftPeerId leaderPeerId) {
+ this.roleCheckLock.writeLock().lock();
+ try {
+ this.cachedPeerRole = Optional.ofNullable(currentRole);
+ this.cachedLeaderPeerId = Optional.ofNullable(leaderPeerId);
+ } finally {
+ this.roleCheckLock.writeLock().unlock();
+ }
+ }
+
+ private GroupInfoReply getGroupInfo() throws IOException {
+ GroupInfoRequest groupInfoRequest = new GroupInfoRequest(clientId,
+ raftPeerId, raftGroupId, nextCallId());
+ GroupInfoReply groupInfo = server.getGroupInfo(groupInfoRequest);
+ return groupInfo;
+ }
+
public int getServerPort() {
return port;
}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
index 701ac16de1..acbbd34ac8 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java
@@ -20,10 +20,12 @@
import com.google.common.base.Preconditions;
import com.google.protobuf.ServiceException;
import java.io.IOException;
+import java.util.Collection;
import java.util.concurrent.CompletableFuture;
import org.apache.hadoop.ozone.container.common.transport.server.ratis
.ContainerStateMachine;
import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.om.helpers.OMRatisHelper;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
.OMRequest;
@@ -54,11 +56,15 @@ public class OzoneManagerStateMachine extends BaseStateMachine {
LoggerFactory.getLogger(ContainerStateMachine.class);
private final SimpleStateMachineStorage storage =
new SimpleStateMachineStorage();
+ private final OzoneManagerRatisServer omRatisServer;
+ private final OzoneManagerProtocol ozoneManager;
private final OzoneManagerRequestHandler handler;
private RaftGroupId raftGroupId;
- public OzoneManagerStateMachine(OzoneManagerProtocol om) {
- this.handler = new OzoneManagerRequestHandler(om);
+ public OzoneManagerStateMachine(OzoneManagerRatisServer ratisServer) {
+ this.omRatisServer = ratisServer;
+ this.ozoneManager = omRatisServer.getOzoneManager();
+ this.handler = new OzoneManagerRequestHandler(ozoneManager);
}
/**
@@ -137,6 +143,15 @@ public CompletableFuture query(Message request) {
}
}
+ /**
+ * Notifies the state machine that the raft peer is no longer leader.
+ */
+ @Override
+ public void notifyNotLeader(Collection pendingEntries)
+ throws IOException {
+ omRatisServer.updateServerRole();
+ }
+
/**
* Submits request to OM and returns the response Message.
* @param request OMRequest
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index 2f1d64d894..395cc42388 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -17,18 +17,24 @@
package org.apache.hadoop.ozone.protocolPB;
import org.apache.hadoop.hdds.tracing.TracingUtil;
+import org.apache.hadoop.ozone.OmUtils;
+import org.apache.hadoop.ozone.om.exceptions.NotLeaderException;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB;
import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisClient;
+import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
import io.opentracing.Scope;
+import org.apache.ratis.protocol.RaftPeerId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Optional;
+
/**
* This class is the server-side translator that forwards requests received on
* {@link OzoneManagerProtocolPB}
@@ -38,6 +44,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements
OzoneManagerProtocolPB {
private static final Logger LOG = LoggerFactory
.getLogger(OzoneManagerProtocolServerSideTranslatorPB.class);
+ private final OzoneManagerRatisServer omRatisServer;
private final OzoneManagerRatisClient omRatisClient;
private final OzoneManagerRequestHandler handler;
private final boolean isRatisEnabled;
@@ -48,9 +55,10 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements
* @param impl OzoneManagerProtocolPB
*/
public OzoneManagerProtocolServerSideTranslatorPB(
- OzoneManagerProtocol impl, OzoneManagerRatisClient ratisClient,
- boolean enableRatis) {
+ OzoneManagerProtocol impl, OzoneManagerRatisServer ratisServer,
+ OzoneManagerRatisClient ratisClient, boolean enableRatis) {
handler = new OzoneManagerRequestHandler(impl);
+ this.omRatisServer = ratisServer;
this.omRatisClient = ratisClient;
this.isRatisEnabled = enableRatis;
}
@@ -68,7 +76,12 @@ public OMResponse submitRequest(RpcController controller,
request.getTraceID());
try {
if (isRatisEnabled) {
- return submitRequestToRatis(request);
+ // Check if the request is a read only request
+ if (OmUtils.isReadOnly(request)) {
+ return submitReadRequestToOM(request);
+ } else {
+ return submitRequestToRatis(request);
+ }
} else {
return submitRequestDirectlyToOM(request);
}
@@ -85,6 +98,32 @@ private OMResponse submitRequestToRatis(OMRequest request)
return omRatisClient.sendCommand(request);
}
+ private OMResponse submitReadRequestToOM(OMRequest request)
+ throws ServiceException {
+ // Check if this OM is the leader.
+ if (omRatisServer.isLeader()) {
+ return handler.handle(request);
+ } else {
+ RaftPeerId raftPeerId = omRatisServer.getRaftPeerId();
+ Optional leaderRaftPeerId = omRatisServer
+ .getCachedLeaderPeerId();
+
+ NotLeaderException notLeaderException;
+ if (leaderRaftPeerId.isPresent()) {
+ notLeaderException = new NotLeaderException(raftPeerId.toString());
+ } else {
+ notLeaderException = new NotLeaderException(
+ raftPeerId.toString(), leaderRaftPeerId.toString());
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(notLeaderException.getMessage());
+ }
+
+ throw new ServiceException(notLeaderException);
+ }
+ }
+
/**
* Submits request directly to OM.
*/