HDFS-16547. [SBN read] Namenode in safe mode should not be transfer to observer state (#4201)
Signed-off-by: Erik Krogen <xkrogen@apache.org> Reviewed-by: Zengqiang Xu <xuzq_zander@163.com>
This commit is contained in:
parent
dc2fba45fe
commit
8f971b0e54
@ -2009,6 +2009,9 @@ synchronized void transitionToStandby() throws IOException {
|
|||||||
synchronized void transitionToObserver() throws IOException {
|
synchronized void transitionToObserver() throws IOException {
|
||||||
String operationName = "transitionToObserver";
|
String operationName = "transitionToObserver";
|
||||||
namesystem.checkSuperuserPrivilege(operationName);
|
namesystem.checkSuperuserPrivilege(operationName);
|
||||||
|
if (notBecomeActiveInSafemode && isInSafeMode()) {
|
||||||
|
throw new ServiceFailedException(getRole() + " still not leave safemode");
|
||||||
|
}
|
||||||
if (!haEnabled) {
|
if (!haEnabled) {
|
||||||
throw new ServiceFailedException("HA for namenode is not enabled");
|
throw new ServiceFailedException("HA for namenode is not enabled");
|
||||||
}
|
}
|
||||||
|
@ -247,7 +247,7 @@ private boolean checkSupportObserver(HAServiceTarget target) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private int transitionToObserver(final CommandLine cmd)
|
private int transitionToObserver(final CommandLine cmd)
|
||||||
throws IOException, ServiceFailedException {
|
throws IOException {
|
||||||
String[] argv = cmd.getArgs();
|
String[] argv = cmd.getArgs();
|
||||||
if (argv.length != 1) {
|
if (argv.length != 1) {
|
||||||
errOut.println("transitionToObserver: incorrect number of arguments");
|
errOut.println("transitionToObserver: incorrect number of arguments");
|
||||||
@ -262,8 +262,13 @@ private int transitionToObserver(final CommandLine cmd)
|
|||||||
if (!checkManualStateManagementOK(target)) {
|
if (!checkManualStateManagementOK(target)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
HAServiceProtocol proto = target.getProxy(getConf(), 0);
|
try {
|
||||||
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
|
HAServiceProtocol proto = target.getProxy(getConf(), 0);
|
||||||
|
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
|
||||||
|
} catch (ServiceFailedException e) {
|
||||||
|
errOut.println("transitionToObserver failed! " + e.getLocalizedMessage());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3725,7 +3725,7 @@
|
|||||||
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
<description>
|
<description>
|
||||||
This will prevent safe mode namenodes to become active while other standby
|
This will prevent safe mode namenodes to become active or observer while other standby
|
||||||
namenodes might be ready to serve requests when it is set to true.
|
namenodes might be ready to serve requests when it is set to true.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y
|
|||||||
<value>hdfs://mycluster</value>
|
<value>hdfs://mycluster</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
|
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
|
||||||
|
|
||||||
Whether allow namenode to become active when it is in safemode, when it is
|
Whether allow namenode to become active when it is in safemode, when it is
|
||||||
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
|
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
|
||||||
auto failover is on, or will throw exception to fail the transition to
|
auto failover is on, or will throw exception to fail the transition to
|
||||||
active if auto failover is off. For example:
|
active if auto failover is off. If you transition namenode to observer state
|
||||||
|
when it is in safemode, when this configuration is set to true, namenode will throw exception
|
||||||
|
to fail the transition to observer. For example:
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
||||||
|
@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y
|
|||||||
<value>/path/to/journal/node/local/data</value>
|
<value>/path/to/journal/node/local/data</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
|
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
|
||||||
|
|
||||||
Whether allow namenode to become active when it is in safemode, when it is
|
Whether allow namenode to become active when it is in safemode, when it is
|
||||||
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
|
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
|
||||||
auto failover is on, or will throw exception to fail the transition to
|
auto failover is on, or will throw exception to fail the transition to
|
||||||
active if auto failover is off. For example:
|
active if auto failover is off. If you transition namenode to observer state
|
||||||
|
when it is in safemode, when this configuration is set to true, namenode will throw exception
|
||||||
|
to fail the transition to observer. For example:
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
<name>dfs.ha.nn.not-become-active-in-safemode</name>
|
||||||
|
@ -977,4 +977,26 @@ public void testTransitionToActiveWhenSafeMode() throws Exception {
|
|||||||
() -> miniCluster.transitionToActive(0));
|
() -> miniCluster.transitionToActive(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTransitionToObserverWhenSafeMode() throws Exception {
|
||||||
|
Configuration config = new Configuration();
|
||||||
|
config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
|
||||||
|
try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
|
||||||
|
new File(GenericTestUtils.getRandomizedTempPath()))
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
|
.numDataNodes(1)
|
||||||
|
.build()) {
|
||||||
|
miniCluster.waitActive();
|
||||||
|
miniCluster.transitionToStandby(0);
|
||||||
|
miniCluster.transitionToStandby(1);
|
||||||
|
NameNode namenode0 = miniCluster.getNameNode(0);
|
||||||
|
NameNode namenode1 = miniCluster.getNameNode(1);
|
||||||
|
NameNodeAdapter.enterSafeMode(namenode0, false);
|
||||||
|
NameNodeAdapter.enterSafeMode(namenode1, false);
|
||||||
|
LambdaTestUtils.intercept(ServiceFailedException.class,
|
||||||
|
"NameNode still not leave safemode",
|
||||||
|
() -> miniCluster.transitionToObserver(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,10 +17,12 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.tools;
|
package org.apache.hadoop.hdfs.tools;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster {
|
|||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
conf = new Configuration();
|
conf = new Configuration();
|
||||||
|
conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
|
||||||
cluster = new MiniDFSCluster.Builder(conf)
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
|
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
|
||||||
.build();
|
.build();
|
||||||
@ -161,7 +164,28 @@ public void testObserverIllegalTransition() throws Exception {
|
|||||||
assertEquals(-1, runTool("-transitionToActive", "nn1"));
|
assertEquals(-1, runTool("-transitionToActive", "nn1"));
|
||||||
assertFalse(nnode1.isActiveState());
|
assertFalse(nnode1.isActiveState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that a Namenode in safe mode should not be transfer to observer.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testObserverTransitionInSafeMode() throws Exception {
|
||||||
|
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
|
||||||
|
DFSHAAdmin admin = new DFSHAAdmin();
|
||||||
|
admin.setConf(conf);
|
||||||
|
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
|
||||||
|
int result = admin.run(
|
||||||
|
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
|
||||||
|
assertEquals("State transition returned: " + result, -1, result);
|
||||||
|
|
||||||
|
NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0));
|
||||||
|
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
|
||||||
|
int result1 = admin.run(
|
||||||
|
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
|
||||||
|
assertEquals("State transition returned: " + result1, 0, result1);
|
||||||
|
assertFalse(cluster.getNameNode(0).isInSafeMode());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTryFailoverToSafeMode() throws Exception {
|
public void testTryFailoverToSafeMode() throws Exception {
|
||||||
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
|
||||||
|
Loading…
Reference in New Issue
Block a user