HDFS-2853. HA: NN fails to start if the shared edits dir is marked required. Contributed by Aaron T. Myers.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238134 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
641f79a325
commit
6be13332db
@ -141,3 +141,5 @@ HDFS-2841. HAAdmin does not work if security is enabled. (atm)
|
|||||||
HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd)
|
HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd)
|
||||||
|
|
||||||
HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd)
|
HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd)
|
||||||
|
|
||||||
|
HDFS-2853. HA: NN fails to start if the shared edits dir is marked required (atm via eli)
|
||||||
|
@ -865,7 +865,7 @@ synchronized void startLogSegment(final long segmentTxId,
|
|||||||
editLogStream = journalSet.startLogSegment(segmentTxId);
|
editLogStream = journalSet.startLogSegment(segmentTxId);
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new IOException("Unable to start log segment " +
|
throw new IOException("Unable to start log segment " +
|
||||||
segmentTxId + ": no journals successfully started.");
|
segmentTxId + ": too few journals successfully started.", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
curSegmentTxId = segmentTxId;
|
curSegmentTxId = segmentTxId;
|
||||||
|
@ -37,9 +37,6 @@ final class NameNodeResourcePolicy {
|
|||||||
* required to continue operation.
|
* required to continue operation.
|
||||||
* @return true if and only if there are sufficient NN resources to
|
* @return true if and only if there are sufficient NN resources to
|
||||||
* continue logging edits.
|
* continue logging edits.
|
||||||
* @throws RuntimeException if the number of <bold>configured</bold>
|
|
||||||
* redundant resources is fewer than the minimum number of available
|
|
||||||
* redundant resources.
|
|
||||||
*/
|
*/
|
||||||
static boolean areResourcesAvailable(
|
static boolean areResourcesAvailable(
|
||||||
Collection<? extends CheckableNameNodeResource> resources,
|
Collection<? extends CheckableNameNodeResource> resources,
|
||||||
@ -63,12 +60,6 @@ static boolean areResourcesAvailable(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (redundantResourceCount < minimumRedundantResources) {
|
|
||||||
throw new RuntimeException("Need a minimum of " + minimumRedundantResources
|
|
||||||
+ " for NN to operate but only " + redundantResourceCount
|
|
||||||
+ " are configured.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (redundantResourceCount == 0) {
|
if (redundantResourceCount == 0) {
|
||||||
// If there are no redundant resources, return true if there are any
|
// If there are no redundant resources, return true if there are any
|
||||||
// required resources available.
|
// required resources available.
|
||||||
|
@ -664,7 +664,12 @@ private void createNameNodesAndSetConf(MiniDFSNNTopology nnTopology,
|
|||||||
}
|
}
|
||||||
|
|
||||||
public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
|
public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
|
||||||
return fileAsURI(new File(base_dir, "shared-edits-" +
|
return formatSharedEditsDir(base_dir, minNN, maxNN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static URI formatSharedEditsDir(File baseDir, int minNN, int maxNN)
|
||||||
|
throws IOException {
|
||||||
|
return fileAsURI(new File(baseDir, "shared-edits-" +
|
||||||
minNN + "-through-" + maxNN));
|
minNN + "-through-" + maxNN));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -807,7 +807,7 @@ public void testFailedOpen() throws Exception {
|
|||||||
fail("Did no throw exception on only having a bad dir");
|
fail("Did no throw exception on only having a bad dir");
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
GenericTestUtils.assertExceptionContains(
|
GenericTestUtils.assertExceptionContains(
|
||||||
"no journals successfully started", ioe);
|
"too few journals successfully started", ioe);
|
||||||
} finally {
|
} finally {
|
||||||
logDir.setWritable(true);
|
logDir.setWritable(true);
|
||||||
log.close();
|
log.close();
|
||||||
|
@ -50,13 +50,7 @@ public void testMultipleRedundantResources() {
|
|||||||
assertFalse(testResourceScenario(4, 0, 3, 0, 2));
|
assertFalse(testResourceScenario(4, 0, 3, 0, 2));
|
||||||
assertTrue(testResourceScenario(4, 0, 3, 0, 1));
|
assertTrue(testResourceScenario(4, 0, 3, 0, 1));
|
||||||
assertFalse(testResourceScenario(4, 0, 4, 0, 1));
|
assertFalse(testResourceScenario(4, 0, 4, 0, 1));
|
||||||
try {
|
assertFalse(testResourceScenario(1, 0, 0, 0, 2));
|
||||||
testResourceScenario(1, 0, 0, 0, 2);
|
|
||||||
fail("Should fail if there are more minimum redundant resources than " +
|
|
||||||
"total redundant resources");
|
|
||||||
} catch (RuntimeException rte) {
|
|
||||||
assertTrue(rte.getMessage().startsWith("Need a minimum"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -0,0 +1,93 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestFailureOfSharedDir {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestFailureOfSharedDir.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that marking the shared edits dir as being "required" causes the NN to
|
||||||
|
* fail if that dir can't be accessed.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFailureOfSharedDir() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
URI sharedEditsUri = MiniDFSCluster.formatSharedEditsDir(
|
||||||
|
new File(MiniDFSCluster.getBaseDirectory()), 0, 1);
|
||||||
|
// Mark the shared edits dir required.
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY,
|
||||||
|
sharedEditsUri.toString());
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
|
.numDataNodes(0)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
assertEquals(sharedEditsUri, cluster.getSharedEditsDir(0, 1));
|
||||||
|
|
||||||
|
cluster.waitActive();
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
|
||||||
|
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
|
||||||
|
|
||||||
|
assertTrue(fs.mkdirs(new Path("/test1")));
|
||||||
|
|
||||||
|
// Blow away the shared edits dir.
|
||||||
|
FileUtil.fullyDelete(new File(sharedEditsUri));
|
||||||
|
|
||||||
|
NameNode nn0 = cluster.getNameNode(0);
|
||||||
|
try {
|
||||||
|
// Make sure that subsequent operations on the NN fail.
|
||||||
|
nn0.getRpcServer().rollEditLog();
|
||||||
|
fail("Succeeded in rolling edit log despite shared dir being deleted");
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
GenericTestUtils.assertExceptionContains(
|
||||||
|
"Unable to start log segment 4: too few journals successfully started",
|
||||||
|
ioe);
|
||||||
|
LOG.info("Got expected exception", ioe);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user