HDFS-16547. [SBN read] Namenode in safe mode should not be transfer to observer state (#4201)

Signed-off-by: Erik Krogen <xkrogen@apache.org>
Reviewed-by: Zengqiang Xu <xuzq_zander@163.com>
This commit is contained in:
litao 2022-11-22 02:14:07 +08:00 committed by GitHub
parent dc2fba45fe
commit 8f971b0e54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 67 additions and 9 deletions

View File

@ -2009,6 +2009,9 @@ synchronized void transitionToStandby() throws IOException {
synchronized void transitionToObserver() throws IOException {
String operationName = "transitionToObserver";
namesystem.checkSuperuserPrivilege(operationName);
if (notBecomeActiveInSafemode && isInSafeMode()) {
throw new ServiceFailedException(getRole() + " still not leave safemode");
}
if (!haEnabled) {
throw new ServiceFailedException("HA for namenode is not enabled");
}

View File

@ -247,7 +247,7 @@ private boolean checkSupportObserver(HAServiceTarget target) {
}
private int transitionToObserver(final CommandLine cmd)
throws IOException, ServiceFailedException {
throws IOException {
String[] argv = cmd.getArgs();
if (argv.length != 1) {
errOut.println("transitionToObserver: incorrect number of arguments");
@ -262,8 +262,13 @@ private int transitionToObserver(final CommandLine cmd)
if (!checkManualStateManagementOK(target)) {
return -1;
}
HAServiceProtocol proto = target.getProxy(getConf(), 0);
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
try {
HAServiceProtocol proto = target.getProxy(getConf(), 0);
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
} catch (ServiceFailedException e) {
errOut.println("transitionToObserver failed! " + e.getLocalizedMessage());
return -1;
}
return 0;
}

View File

@ -3725,7 +3725,7 @@
<name>dfs.ha.nn.not-become-active-in-safemode</name>
<value>false</value>
<description>
This will prevent safe mode namenodes to become active while other standby
This will prevent safe mode namenodes to become active or observer while other standby
namenodes might be ready to serve requests when it is set to true.
</description>
</property>

View File

@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y
<value>hdfs://mycluster</value>
</property>
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
Whether allow namenode to become active when it is in safemode, when it is
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
auto failover is on, or will throw exception to fail the transition to
active if auto failover is off. For example:
active if auto failover is off. If you transition namenode to observer state
when it is in safemode, when this configuration is set to true, namenode will throw exception
to fail the transition to observer. For example:
<property>
<name>dfs.ha.nn.not-become-active-in-safemode</name>

View File

@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y
<value>/path/to/journal/node/local/data</value>
</property>
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
Whether allow namenode to become active when it is in safemode, when it is
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
auto failover is on, or will throw exception to fail the transition to
active if auto failover is off. For example:
active if auto failover is off. If you transition namenode to observer state
when it is in safemode, when this configuration is set to true, namenode will throw exception
to fail the transition to observer. For example:
<property>
<name>dfs.ha.nn.not-become-active-in-safemode</name>

View File

@ -977,4 +977,26 @@ public void testTransitionToActiveWhenSafeMode() throws Exception {
() -> miniCluster.transitionToActive(0));
}
}
@Test
public void testTransitionToObserverWhenSafeMode() throws Exception {
Configuration config = new Configuration();
config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
new File(GenericTestUtils.getRandomizedTempPath()))
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build()) {
miniCluster.waitActive();
miniCluster.transitionToStandby(0);
miniCluster.transitionToStandby(1);
NameNode namenode0 = miniCluster.getNameNode(0);
NameNode namenode1 = miniCluster.getNameNode(1);
NameNodeAdapter.enterSafeMode(namenode0, false);
NameNodeAdapter.enterSafeMode(namenode1, false);
LambdaTestUtils.intercept(ServiceFailedException.class,
"NameNode still not leave safemode",
() -> miniCluster.transitionToObserver(0));
}
}
}

View File

@ -17,10 +17,12 @@
*/
package org.apache.hadoop.hdfs.tools;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster {
@Before
public void setup() throws IOException {
conf = new Configuration();
conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
.build();
@ -161,7 +164,28 @@ public void testObserverIllegalTransition() throws Exception {
assertEquals(-1, runTool("-transitionToActive", "nn1"));
assertFalse(nnode1.isActiveState());
}
/**
* Tests that a Namenode in safe mode should not be transfer to observer.
*/
@Test
public void testObserverTransitionInSafeMode() throws Exception {
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
DFSHAAdmin admin = new DFSHAAdmin();
admin.setConf(conf);
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
int result = admin.run(
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
assertEquals("State transition returned: " + result, -1, result);
NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0));
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
int result1 = admin.run(
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
assertEquals("State transition returned: " + result1, 0, result1);
assertFalse(cluster.getNameNode(0).isInSafeMode());
}
@Test
public void testTryFailoverToSafeMode() throws Exception {
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,