HDFS-2914. HA: Standby should not enter safemode when resources are low. Contributed by Vinay.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1347895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
072ae08a00
commit
b98e26af58
@ -300,6 +300,9 @@ Branch-2 ( Unreleased changes )
|
|||||||
HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis
|
HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis
|
||||||
jumps (Andy Isaacson via todd)
|
jumps (Andy Isaacson via todd)
|
||||||
|
|
||||||
|
HDFS-2914. HA: Standby should not enter safemode when resources are low.
|
||||||
|
(Vinay via atm)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||||
|
|
||||||
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
||||||
|
@ -572,8 +572,6 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep
|
|||||||
!safeMode.isPopulatingReplQueues();
|
!safeMode.isPopulatingReplQueues();
|
||||||
setBlockTotal();
|
setBlockTotal();
|
||||||
blockManager.activate(conf);
|
blockManager.activate(conf);
|
||||||
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
|
||||||
nnrmthread.start();
|
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
}
|
}
|
||||||
@ -590,7 +588,6 @@ void stopCommonServices() {
|
|||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
if (blockManager != null) blockManager.close();
|
if (blockManager != null) blockManager.close();
|
||||||
if (nnrmthread != null) nnrmthread.interrupt();
|
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
}
|
}
|
||||||
@ -644,6 +641,10 @@ void startActiveServices() throws IOException {
|
|||||||
}
|
}
|
||||||
leaseManager.startMonitor();
|
leaseManager.startMonitor();
|
||||||
startSecretManagerIfNecessary();
|
startSecretManagerIfNecessary();
|
||||||
|
|
||||||
|
//ResourceMonitor required only at ActiveNN. See HDFS-2914
|
||||||
|
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
||||||
|
nnrmthread.start();
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
}
|
}
|
||||||
@ -666,6 +667,10 @@ void stopActiveServices() {
|
|||||||
if (leaseManager != null) {
|
if (leaseManager != null) {
|
||||||
leaseManager.stopMonitor();
|
leaseManager.stopMonitor();
|
||||||
}
|
}
|
||||||
|
if (nnrmthread != null) {
|
||||||
|
((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
|
||||||
|
nnrmthread.interrupt();
|
||||||
|
}
|
||||||
if (dir != null && dir.fsImage != null) {
|
if (dir != null && dir.fsImage != null) {
|
||||||
if (dir.fsImage.editLog != null) {
|
if (dir.fsImage.editLog != null) {
|
||||||
dir.fsImage.editLog.close();
|
dir.fsImage.editLog.close();
|
||||||
@ -3193,10 +3198,11 @@ void checkAvailableResources() {
|
|||||||
* acceptable levels, this daemon will cause the NN to exit safe mode.
|
* acceptable levels, this daemon will cause the NN to exit safe mode.
|
||||||
*/
|
*/
|
||||||
class NameNodeResourceMonitor implements Runnable {
|
class NameNodeResourceMonitor implements Runnable {
|
||||||
|
boolean shouldNNRmRun = true;
|
||||||
@Override
|
@Override
|
||||||
public void run () {
|
public void run () {
|
||||||
try {
|
try {
|
||||||
while (fsRunning) {
|
while (fsRunning && shouldNNRmRun) {
|
||||||
checkAvailableResources();
|
checkAvailableResources();
|
||||||
if(!nameNodeHasResourcesAvailable()) {
|
if(!nameNodeHasResourcesAvailable()) {
|
||||||
String lowResourcesMsg = "NameNode low on available disk space. ";
|
String lowResourcesMsg = "NameNode low on available disk space. ";
|
||||||
@ -3217,7 +3223,11 @@ public void run () {
|
|||||||
FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
|
FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
public void stopMonitor() {
|
||||||
|
shouldNNRmRun = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public FSImage getFSImage() {
|
public FSImage getFSImage() {
|
||||||
return dir.fsImage;
|
return dir.fsImage;
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode.ha;
|
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@ -127,6 +129,7 @@ public void testSharedDirsComeFirstInEditsList() throws Exception {
|
|||||||
@Test
|
@Test
|
||||||
public void testFailureOfSharedDir() throws Exception {
|
public void testFailureOfSharedDir() throws Exception {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
|
||||||
|
|
||||||
// The shared edits dir will automatically be marked required.
|
// The shared edits dir will automatically be marked required.
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
@ -151,6 +154,15 @@ public void testFailureOfSharedDir() throws Exception {
|
|||||||
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
|
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
|
||||||
true));
|
true));
|
||||||
|
|
||||||
|
Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
|
||||||
|
DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
|
||||||
|
|
||||||
|
NameNode nn1 = cluster.getNameNode(1);
|
||||||
|
assertTrue(nn1.isStandbyState());
|
||||||
|
assertFalse(
|
||||||
|
"StandBy NameNode should not go to SafeMode on resource unavailability",
|
||||||
|
nn1.isInSafeMode());
|
||||||
|
|
||||||
NameNode nn0 = cluster.getNameNode(0);
|
NameNode nn0 = cluster.getNameNode(0);
|
||||||
nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
|
nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
|
||||||
.setRuntimeForTesting(mockRuntime);
|
.setRuntimeForTesting(mockRuntime);
|
||||||
|
Loading…
Reference in New Issue
Block a user