HDFS-2430. The number of failed or low-resource volumes the NN can tolerate should be configurable. Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1211650 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2011-12-07 21:47:13 +00:00
parent c17bb83644
commit a27adf3de4
15 changed files with 607 additions and 132 deletions

View File

@ -21,6 +21,9 @@ Trunk (unreleased changes)
HDFS-2636. Implement protobuf service for ClientDatanodeProtocol. (suresh)
HDFS-2430. The number of failed or low-resource volumes the NN can tolerate
should be configurable. (atm)
IMPROVEMENTS
HADOOP-7524 Change RPC to allow multiple protocols including multuple

View File

@ -129,6 +129,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT = true;
public static final String DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY = "dfs.namenode.num.checkpoints.retained";
public static final int DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT = 2;
public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
public static final String DFS_LIST_LIMIT = "dfs.ls.limit";
public static final int DFS_LIST_LIMIT_DEFAULT = 1000;
@ -162,6 +164,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_NAME_DIR_KEY = "dfs.namenode.name.dir";
public static final String DFS_NAMENODE_EDITS_DIR_KEY = "dfs.namenode.edits.dir";
public static final String DFS_NAMENODE_EDITS_PLUGIN_PREFIX = "dfs.namenode.edits.journal-plugin";
public static final String DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY = "dfs.namenode.edits.dir.required";
public static final String DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size";
public static final String DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
public static final String DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";
@ -304,6 +307,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_DU_RESERVED_KEY = "dfs.namenode.resource.du.reserved";
public static final long DFS_NAMENODE_DU_RESERVED_DEFAULT = 1024 * 1024 * 100; // 100 MB
public static final String DFS_NAMENODE_CHECKED_VOLUMES_KEY = "dfs.namenode.resource.checked.volumes";
public static final String DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY = "dfs.namenode.resource.checked.volumes.minimum";
public static final int DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_DEFAULT = 1;
public static final String DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY = "dfs.web.authentication.kerberos.principal";
public static final String DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";

View File

@ -68,14 +68,28 @@ public static URI stringAsURI(String s) throws IOException {
}
/**
* Converts the passed File to a URI.
*
* Converts the passed File to a URI. This method trims the trailing slash if
* one is appended because the underlying file is in fact a directory that
* exists.
*
* @param f the file to convert
* @return the resulting URI
* @throws IOException
* @return the resulting URI
* @throws IOException
*/
public static URI fileAsURI(File f) throws IOException {
return f.getCanonicalFile().toURI();
URI u = f.getCanonicalFile().toURI();
// trim the trailing slash, if it's present
if (u.getPath().endsWith("/")) {
String uriAsString = u.toString();
try {
u = new URI(uriAsString.substring(0, uriAsString.length() - 1));
} catch (URISyntaxException e) {
throw new IOException(e);
}
}
return u;
}
/**

View File

@ -0,0 +1,45 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Implementers of this class represent a NN resource whose availability can be
* checked. A resource can be either "required" or "redundant". All required
* resources must be available for the NN to continue operating. The NN will
* continue to operate as long as *any* redundant resource is available.
*/
@InterfaceAudience.Private
interface CheckableNameNodeResource {
/**
* Is this resource currently available.
*
* @return true if and only if the resource in question is available.
*/
public boolean isResourceAvailable();
/**
* Is this resource required.
*
* @return true if and only if the resource in question is required for NN operation.
*/
public boolean isRequired();
}

View File

@ -22,10 +22,10 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.lang.reflect.Constructor;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -77,7 +77,7 @@ private enum State {
private State state = State.UNINITIALIZED;
//initialize
final private JournalSet journalSet;
private JournalSet journalSet;
private EditLogOutputStream editLogStream = null;
// a monotonically increasing counter that represents transactionIds.
@ -111,6 +111,8 @@ private enum State {
private NNStorage storage;
private Configuration conf;
private Collection<URI> editsDirs;
private static class TransactionId {
public long txid;
@ -127,19 +129,22 @@ protected synchronized TransactionId initialValue() {
}
};
final private Collection<URI> editsDirs;
/**
* Construct FSEditLog with default configuration, taking editDirs from NNStorage
*
* @param storage Storage object used by namenode
*/
@VisibleForTesting
FSEditLog(NNStorage storage) {
this(new Configuration(), storage, Collections.<URI>emptyList());
FSEditLog(NNStorage storage) throws IOException {
Configuration conf = new Configuration();
// Make sure the edits dirs are set in the provided configuration object.
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
StringUtils.join(storage.getEditsDirectories(), ","));
init(conf, storage, FSNamesystem.getNamespaceEditsDirs(conf));
}
/**
* Constructor for FSEditLog. Add underlying journals are constructed, but
* Constructor for FSEditLog. Underlying journals are constructed, but
* no streams are opened until open() is called.
*
* @param conf The namenode configuration
@ -147,36 +152,35 @@ protected synchronized TransactionId initialValue() {
* @param editsDirs List of journals to use
*/
FSEditLog(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
this.conf = conf;
init(conf, storage, editsDirs);
}
private void init(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
isSyncRunning = false;
this.conf = conf;
this.storage = storage;
metrics = NameNode.getNameNodeMetrics();
lastPrintTime = now();
// If this list is empty, an error will be thrown on first use
// of the editlog, as no journals will exist
this.editsDirs = Lists.newArrayList(editsDirs);
if (editsDirs.isEmpty()) {
// if this is the case, no edit dirs have been explictly configured
// image dirs are to be used for edits too
try {
editsDirs = Lists.newArrayList(storage.getEditsDirectories());
} catch (IOException ioe) {
// cannot get list from storage, so the empty editsDirs
// will be assigned. an error will be thrown on first use
// of the editlog, as no journals will exist
}
this.editsDirs = editsDirs;
} else {
this.editsDirs = Lists.newArrayList(editsDirs);
}
int minimumRedundantJournals = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY,
DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT);
this.journalSet = new JournalSet();
journalSet = new JournalSet(minimumRedundantJournals);
for (URI u : this.editsDirs) {
boolean required = FSNamesystem.getRequiredNamespaceEditsDirs(conf)
.contains(u);
if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
StorageDirectory sd = storage.getStorageDirectory(u);
if (sd != null) {
journalSet.add(new FileJournalManager(sd));
journalSet.add(new FileJournalManager(sd), required);
}
} else {
journalSet.add(createJournal(u));
journalSet.add(createJournal(u), required);
}
}
@ -442,7 +446,7 @@ public void logSync() {
}
editLogStream.setReadyToFlush();
} catch (IOException e) {
LOG.fatal("Could not sync any journal to persistent storage. "
LOG.fatal("Could not sync enough journals to persistent storage. "
+ "Unsynced transactions: " + (txid - synctxid),
new Exception());
runtime.exit(1);
@ -464,7 +468,7 @@ public void logSync() {
}
} catch (IOException ex) {
synchronized (this) {
LOG.fatal("Could not sync any journal to persistent storage. "
LOG.fatal("Could not sync enough journals to persistent storage. "
+ "Unsynced transactions: " + (txid - synctxid), new Exception());
runtime.exit(1);
}
@ -917,7 +921,7 @@ synchronized void registerBackupNode(
LOG.info("Registering new backup node: " + bnReg);
BackupJournalManager bjm = new BackupJournalManager(bnReg, nnReg);
journalSet.add(bjm);
journalSet.add(bjm, true);
}
synchronized void releaseBackupStream(NamenodeRegistration registration)

View File

@ -738,7 +738,7 @@ void saveFSImage(SaveNamespaceContext context, StorageDirectory sd)
* FSImageSaver assumes that it was launched from a thread that holds
* FSNamesystem lock and waits for the execution of FSImageSaver thread
* to finish.
* This way we are guraranteed that the namespace is not being updated
* This way we are guaranteed that the namespace is not being updated
* while multiple instances of FSImageSaver are traversing it
* and writing it out.
*/

View File

@ -33,6 +33,7 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
@ -430,6 +431,21 @@ void activate(Configuration conf) throws IOException {
public static Collection<URI> getNamespaceDirs(Configuration conf) {
return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
}
public static Collection<URI> getNamespaceEditsDirs(Configuration conf) {
Collection<URI> editsDirs = getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
if (editsDirs.isEmpty()) {
// If this is the case, no edit dirs have been explicitly configured.
// Image dirs are to be used for edits too.
return getNamespaceDirs(conf);
} else {
return editsDirs;
}
}
public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
return getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY);
}
private static Collection<URI> getStorageDirs(Configuration conf,
String propertyName) {
@ -461,10 +477,6 @@ private static Collection<URI> getStorageDirs(Configuration conf,
return Util.stringCollectionAsURIs(dirNames);
}
public static Collection<URI> getNamespaceEditsDirs(Configuration conf) {
return getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
}
@Override
public void readLock() {
this.fsLock.readLock().lock();

View File

@ -50,17 +50,16 @@ public class JournalSet implements JournalManager {
*
* If a Journal gets disabled due to an error writing to its
* stream, then the stream will be aborted and set to null.
*
* This should be used outside JournalSet only for testing.
*/
@VisibleForTesting
static class JournalAndStream {
static class JournalAndStream implements CheckableNameNodeResource {
private final JournalManager journal;
private boolean disabled = false;
private EditLogOutputStream stream;
private boolean required = false;
public JournalAndStream(JournalManager manager) {
public JournalAndStream(JournalManager manager, boolean required) {
this.journal = manager;
this.required = required;
}
public void startLogSegment(long txId) throws IOException {
@ -132,9 +131,24 @@ private boolean isDisabled() {
private void setDisabled(boolean disabled) {
this.disabled = disabled;
}
@Override
public boolean isResourceAvailable() {
return !isDisabled();
}
@Override
public boolean isRequired() {
return required;
}
}
private List<JournalAndStream> journals = Lists.newArrayList();
final int minimumRedundantJournals;
JournalSet(int minimumRedundantResources) {
this.minimumRedundantJournals = minimumRedundantResources;
}
@Override
public EditLogOutputStream startLogSegment(final long txId) throws IOException {
@ -232,16 +246,15 @@ public long getNumberOfTransactions(long fromTxnId) throws IOException {
}
/**
* Returns true if there are no journals or all are disabled.
* @return True if no journals or all are disabled.
* Returns true if there are no journals, all redundant journals are disabled,
* or any required journals are disabled.
*
* @return True if there no journals, all redundant journals are disabled,
* or any required journals are disabled.
*/
public boolean isEmpty() {
for (JournalAndStream jas : journals) {
if (!jas.isDisabled()) {
return false;
}
}
return true;
return !NameNodeResourcePolicy.areResourcesAvailable(journals,
minimumRedundantJournals);
}
/**
@ -292,9 +305,11 @@ private void mapJournalsAndReportErrors(
}
}
disableAndReportErrorOnJournals(badJAS);
if (badJAS.size() >= journals.size()) {
LOG.error("Error: "+status+" failed for all journals");
throw new IOException(status+" failed on all the journals");
if (!NameNodeResourcePolicy.areResourcesAvailable(journals,
minimumRedundantJournals)) {
String message = status + " failed for too many journals";
LOG.error("Error: " + message);
throw new IOException(message);
}
}
@ -450,8 +465,9 @@ List<JournalManager> getJournalManagers() {
return jList;
}
void add(JournalManager j) {
journals.add(new JournalAndStream(j));
void add(JournalManager j, boolean required) {
JournalAndStream jas = new JournalAndStream(j, required);
journals.add(jas);
}
void remove(JournalManager j) {

View File

@ -27,6 +27,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DF;
import org.apache.hadoop.hdfs.DFSConfigKeys;
@ -40,37 +41,80 @@
*
* NameNodeResourceChecker provides a method -
* <code>hasAvailableDiskSpace</code> - which will return true if and only if
* the NameNode has disk space available on all volumes which are configured to
* be checked. Volumes containing file system name/edits dirs are added by
* default, and arbitrary extra volumes may be configured as well.
* the NameNode has disk space available on all required volumes, and any volume
* which is configured to be redundant. Volumes containing file system edits dirs
* are added by default, and arbitrary extra volumes may be configured as well.
*/
public class NameNodeResourceChecker {
@InterfaceAudience.Private
class NameNodeResourceChecker {
private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
// Space (in bytes) reserved per volume.
private long duReserved;
private final Configuration conf;
private Map<String, DF> volumes;
private Map<String, CheckedVolume> volumes;
private int minimumRedundantVolumes;
@VisibleForTesting
class CheckedVolume implements CheckableNameNodeResource {
private DF df;
private boolean required;
private String volume;
public CheckedVolume(File dirToCheck, boolean required)
throws IOException {
df = new DF(dirToCheck, conf);
this.required = required;
volume = df.getFilesystem();
}
public String getVolume() {
return volume;
}
@Override
public boolean isRequired() {
return required;
}
@Override
public boolean isResourceAvailable() {
long availableSpace = df.getAvailable();
if (LOG.isDebugEnabled()) {
LOG.debug("Space available on volume '" + volume + "' is "
+ availableSpace);
}
if (availableSpace < duReserved) {
LOG.warn("Space available on volume '" + volume + "' is "
+ availableSpace +
", which is below the configured reserved amount " + duReserved);
return false;
} else {
return true;
}
}
@Override
public String toString() {
return "volume: " + volume + " required: " + required +
" resource available: " + isResourceAvailable();
}
}
/**
* Create a NameNodeResourceChecker, which will check the name dirs and edits
* dirs set in <code>conf</code>.
*
* @param conf
* @throws IOException
* Create a NameNodeResourceChecker, which will check the edits dirs and any
* additional dirs to check set in <code>conf</code>.
*/
public NameNodeResourceChecker(Configuration conf) throws IOException {
this.conf = conf;
volumes = new HashMap<String, DF>();
volumes = new HashMap<String, CheckedVolume>();
duReserved = conf.getLong(DFSConfigKeys.DFS_NAMENODE_DU_RESERVED_KEY,
DFSConfigKeys.DFS_NAMENODE_DU_RESERVED_DEFAULT);
Collection<URI> extraCheckedVolumes = Util.stringCollectionAsURIs(conf
.getTrimmedStringCollection(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_KEY));
addDirsToCheck(FSNamesystem.getNamespaceDirs(conf));
Collection<URI> localEditDirs = Collections2.filter(
FSNamesystem.getNamespaceEditsDirs(conf),
@ -82,70 +126,86 @@ public boolean apply(URI input) {
return false;
}
});
addDirsToCheck(localEditDirs);
addDirsToCheck(extraCheckedVolumes);
// Add all the local edits dirs, marking some as required if they are
// configured as such.
for (URI editsDirToCheck : localEditDirs) {
addDirToCheck(editsDirToCheck,
FSNamesystem.getRequiredNamespaceEditsDirs(conf).contains(
editsDirToCheck));
}
// All extra checked volumes are marked "required"
for (URI extraDirToCheck : extraCheckedVolumes) {
addDirToCheck(extraDirToCheck, true);
}
minimumRedundantVolumes = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY,
DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_DEFAULT);
}
/**
* Add the passed-in directories to the list of volumes to check.
* Add the volume of the passed-in directory to the list of volumes to check.
* If <code>required</code> is true, and this volume is already present, but
* is marked redundant, it will be marked required. If the volume is already
* present but marked required then this method is a no-op.
*
* @param directoriesToCheck
* The directories whose volumes will be checked for available space.
* @throws IOException
* @param directoryToCheck
* The directory whose volume will be checked for available space.
*/
private void addDirsToCheck(Collection<URI> directoriesToCheck)
private void addDirToCheck(URI directoryToCheck, boolean required)
throws IOException {
for (URI directoryUri : directoriesToCheck) {
File dir = new File(directoryUri.getPath());
if (!dir.exists()) {
throw new IOException("Missing directory "+dir.getAbsolutePath());
}
DF df = new DF(dir, conf);
volumes.put(df.getFilesystem(), df);
File dir = new File(directoryToCheck.getPath());
if (!dir.exists()) {
throw new IOException("Missing directory "+dir.getAbsolutePath());
}
CheckedVolume newVolume = new CheckedVolume(dir, required);
CheckedVolume volume = volumes.get(newVolume.getVolume());
if (volume == null || (volume != null && !volume.isRequired())) {
volumes.put(newVolume.getVolume(), newVolume);
}
}
/**
* Return true if disk space is available on at least one of the configured
* volumes.
* redundant volumes, and all of the configured required volumes.
*
* @return True if the configured amount of disk space is available on at
* least one volume, false otherwise.
* @throws IOException
* least one redundant volume and all of the required volumes, false
* otherwise.
*/
boolean hasAvailableDiskSpace()
throws IOException {
return getVolumesLowOnSpace().size() < volumes.size();
return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
minimumRedundantVolumes);
}
/**
* Return the set of directories which are low on space.
*
* @return the set of directories whose free space is below the threshold.
* @throws IOException
*/
@VisibleForTesting
Collection<String> getVolumesLowOnSpace() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Going to check the following volumes disk space: " + volumes);
}
Collection<String> lowVolumes = new ArrayList<String>();
for (DF volume : volumes.values()) {
long availableSpace = volume.getAvailable();
String fileSystem = volume.getFilesystem();
if (LOG.isDebugEnabled()) {
LOG.debug("Space available on volume '" + fileSystem + "' is " + availableSpace);
}
if (availableSpace < duReserved) {
LOG.warn("Space available on volume '" + fileSystem + "' is "
+ availableSpace +
", which is below the configured reserved amount " + duReserved);
lowVolumes.add(volume.getFilesystem());
}
for (CheckedVolume volume : volumes.values()) {
lowVolumes.add(volume.getVolume());
}
return lowVolumes;
}
@VisibleForTesting
void setVolumes(Map<String, DF> volumes) {
void setVolumes(Map<String, CheckedVolume> volumes) {
this.volumes = volumes;
}
@VisibleForTesting
void setMinimumReduntdantVolumes(int minimumRedundantVolumes) {
this.minimumRedundantVolumes = minimumRedundantVolumes;
}
}

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.util.Collection;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* Given a set of checkable resources, this class is capable of determining
* whether sufficient resources are available for the NN to continue operating.
*/
@InterfaceAudience.Private
final class NameNodeResourcePolicy {
/**
* Return true if and only if there are sufficient NN
* resources to continue logging edits.
*
* @param resources the collection of resources to check.
* @param minimumRedundantResources the minimum number of redundant resources
* required to continue operation.
* @return true if and only if there are sufficient NN resources to
* continue logging edits.
* @throws RuntimeException if the number of <bold>configured</bold>
* redundant resources is fewer than the minimum number of available
* redundant resources.
*/
static boolean areResourcesAvailable(
Collection<? extends CheckableNameNodeResource> resources,
int minimumRedundantResources) {
int requiredResourceCount = 0;
int redundantResourceCount = 0;
int disabledRedundantResourceCount = 0;
for (CheckableNameNodeResource resource : resources) {
if (!resource.isRequired()) {
redundantResourceCount++;
if (!resource.isResourceAvailable()) {
disabledRedundantResourceCount++;
}
} else {
requiredResourceCount++;
if (!resource.isResourceAvailable()) {
// Short circuit - a required resource is not available.
return false;
}
}
}
if (redundantResourceCount < minimumRedundantResources) {
throw new RuntimeException("Need a minimum of " + minimumRedundantResources
+ " for NN to operate but only " + redundantResourceCount
+ " are configured.");
}
if (redundantResourceCount == 0) {
// If there are no redundant resources, return true if there are any
// required resources available.
return requiredResourceCount > 0;
} else {
return redundantResourceCount - disabledRedundantResourceCount >=
minimumRedundantResources;
}
}
}

View File

@ -47,7 +47,7 @@ public class TestClusterId {
private String getClusterId(Configuration config) throws IOException {
// see if cluster id not empty.
Collection<URI> dirsToFormat = FSNamesystem.getNamespaceDirs(config);
Collection<URI> editsToFormat = new ArrayList<URI>(0);
Collection<URI> editsToFormat = FSNamesystem.getNamespaceEditsDirs(config);
FSImage fsImage = new FSImage(config, dirsToFormat, editsToFormat);
Iterator<StorageDirectory> sdit =

View File

@ -21,28 +21,32 @@
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.File;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.verification.VerificationMode;
public class TestEditLogJournalFailures {
private int editsPerformed = 0;
private Configuration conf;
private MiniDFSCluster cluster;
private FileSystem fs;
private Runtime runtime;
@ -53,8 +57,13 @@ public class TestEditLogJournalFailures {
*/
@Before
public void setUpMiniCluster() throws IOException {
conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
setUpMiniCluster(new HdfsConfiguration(), true);
}
public void setUpMiniCluster(Configuration conf, boolean manageNameDfsDirs)
throws IOException {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.manageNameDfsDirs(manageNameDfsDirs).build();
cluster.waitActive();
fs = cluster.getFileSystem();
@ -64,11 +73,13 @@ public void setUpMiniCluster() throws IOException {
cluster.getNameNode().getFSImage().getEditLog().setRuntimeForTesting(runtime);
}
@After
public void shutDownMiniCluster() throws IOException {
fs.close();
cluster.shutdown();
if (fs != null)
fs.close();
if (cluster != null)
cluster.shutdown();
}
@Test
@ -109,7 +120,7 @@ public void testAllEditsDirFailOnWrite() throws IOException {
assertTrue(doAnEdit());
// The previous edit could not be synced to any persistent storage, should
// have halted the NN.
assertExitInvocations(1);
assertExitInvocations(atLeast(1));
}
@Test
@ -124,6 +135,80 @@ public void testSingleFailedEditsDirOnSetReadyToFlush() throws IOException {
assertExitInvocations(0);
assertFalse(cluster.getNameNode().isInSafeMode());
}
@Test
public void testSingleRequiredFailedEditsDirOnSetReadyToFlush()
throws IOException {
// Set one of the edits dirs to be required.
String[] editsDirs = cluster.getConfiguration(0).getTrimmedStrings(
DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
shutDownMiniCluster();
Configuration conf = new HdfsConfiguration();
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY, editsDirs[1]);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY, 0);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY, 0);
setUpMiniCluster(conf, true);
assertTrue(doAnEdit());
// Invalidated the one required edits journal.
invalidateEditsDirAtIndex(1, false, false);
// Make sure runtime.exit(...) hasn't been called at all yet.
assertExitInvocations(0);
// This will actually return true in the tests, since the NN will not in
// fact call Runtime.exit();
doAnEdit();
// A single failure of a required journal should result in a call to
// runtime.exit(...).
assertExitInvocations(atLeast(1));
}
@Test
public void testMultipleRedundantFailedEditsDirOnSetReadyToFlush()
throws IOException {
// Set up 4 name/edits dirs.
shutDownMiniCluster();
Configuration conf = new HdfsConfiguration();
String[] nameDirs = new String[4];
for (int i = 0; i < nameDirs.length; i++) {
File nameDir = new File(System.getProperty("test.build.data"),
"name-dir" + i);
nameDir.mkdirs();
nameDirs[i] = nameDir.getAbsolutePath();
}
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
StringUtils.join(nameDirs, ","));
// Keep running unless there are less than 2 edits dirs remaining.
conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY, 2);
setUpMiniCluster(conf, false);
// All journals active.
assertTrue(doAnEdit());
assertExitInvocations(0);
// Invalidate 1/4 of the redundant journals.
invalidateEditsDirAtIndex(0, false, false);
assertTrue(doAnEdit());
assertExitInvocations(0);
// Invalidate 2/4 of the redundant journals.
invalidateEditsDirAtIndex(1, false, false);
assertTrue(doAnEdit());
assertExitInvocations(0);
// Invalidate 3/4 of the redundant journals.
invalidateEditsDirAtIndex(2, false, false);
// This will actually return true in the tests, since the NN will not in
// fact call Runtime.exit();
doAnEdit();
// A failure of more than the minimum number of redundant journals should
// result in a call to runtime.exit(...).
assertExitInvocations(atLeast(1));
}
/**
* Replace the journal at index <code>index</code> with one that throws an
@ -181,6 +266,17 @@ private void restoreEditsDirAtIndex(int index, EditLogOutputStream elos) {
private boolean doAnEdit() throws IOException {
return fs.mkdirs(new Path("/tmp", Integer.toString(editsPerformed++)));
}
/**
* Make sure that Runtime.exit(...) has been called exactly
* <code>expectedExits<code> number of times.
*
* @param expectedExits the exact number of times Runtime.exit(...) should
* have been called.
*/
private void assertExitInvocations(int expectedExits) {
assertExitInvocations(times(expectedExits));
}
/**
* Make sure that Runtime.exit(...) has been called
@ -188,7 +284,7 @@ private boolean doAnEdit() throws IOException {
*
* @param expectedExits the number of times Runtime.exit(...) should have been called.
*/
private void assertExitInvocations(int expectedExits) {
verify(runtime, times(expectedExits)).exit(anyInt());
private void assertExitInvocations(VerificationMode expectedExits) {
verify(runtime, expectedExits).exit(anyInt());
}
}

View File

@ -118,6 +118,7 @@ public void testDummyJournalManager() throws Exception {
DummyJournalManager.class.getName());
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
"dummy://test");
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY, 0);
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
cluster.waitActive();

View File

@ -19,21 +19,20 @@
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DF;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.NameNodeResourceMonitor;
import org.apache.hadoop.hdfs.server.namenode.NameNodeResourceChecker.CheckedVolume;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import com.google.common.collect.Lists;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@ -49,7 +48,7 @@ public void setUp () throws IOException {
baseDir = new File(System.getProperty("test.build.data"));
nameDir = new File(baseDir, "resource-check-name-dir");
nameDir.mkdirs();
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDir.getAbsolutePath());
}
/**
@ -90,7 +89,7 @@ public void testCheckThatNameNodeResourceMonitorIsRunning()
throws IOException, InterruptedException {
MiniDFSCluster cluster = null;
try {
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDir.getAbsolutePath());
conf.setLong(DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 1);
cluster = new MiniDFSCluster.Builder(conf)
@ -145,7 +144,7 @@ public void testChecking2NameDirsOnOneVolume() throws IOException {
File nameDir2 = new File(System.getProperty("test.build.data"), "name-dir2");
nameDir1.mkdirs();
nameDir2.mkdirs();
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
nameDir1.getAbsolutePath() + "," + nameDir2.getAbsolutePath());
conf.setLong(DFSConfigKeys.DFS_NAMENODE_DU_RESERVED_KEY, Long.MAX_VALUE);
@ -164,7 +163,7 @@ public void testCheckingExtraVolumes() throws IOException {
Configuration conf = new Configuration();
File nameDir = new File(System.getProperty("test.build.data"), "name-dir");
nameDir.mkdirs();
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDir.getAbsolutePath());
conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_KEY, nameDir.getAbsolutePath());
conf.setLong(DFSConfigKeys.DFS_NAMENODE_DU_RESERVED_KEY, Long.MAX_VALUE);
@ -176,38 +175,70 @@ public void testCheckingExtraVolumes() throws IOException {
/**
* Test that the NN is considered to be out of resources only once all
* configured volumes are low on resources.
* redundant configured volumes are low on resources, or when any required
* volume is low on resources.
*/
@Test
public void testLowResourceVolumePolicy() throws IOException {
public void testLowResourceVolumePolicy() throws IOException, URISyntaxException {
Configuration conf = new Configuration();
File nameDir1 = new File(System.getProperty("test.build.data"), "name-dir1");
File nameDir2 = new File(System.getProperty("test.build.data"), "name-dir2");
nameDir1.mkdirs();
nameDir2.mkdirs();
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
nameDir1.getAbsolutePath() + "," + nameDir2.getAbsolutePath());
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY, 2);
NameNodeResourceChecker nnrc = new NameNodeResourceChecker(conf);
// For the purpose of this test, we need to force the name dirs to appear to
// be on different volumes.
Map<String, DF> volumes = new HashMap<String, DF>();
volumes.put("volume1", new DF(nameDir1, conf));
volumes.put("volume2", new DF(nameDir2, conf));
Map<String, CheckedVolume> volumes = new HashMap<String, CheckedVolume>();
CheckedVolume volume1 = Mockito.mock(CheckedVolume.class);
CheckedVolume volume2 = Mockito.mock(CheckedVolume.class);
CheckedVolume volume3 = Mockito.mock(CheckedVolume.class);
CheckedVolume volume4 = Mockito.mock(CheckedVolume.class);
CheckedVolume volume5 = Mockito.mock(CheckedVolume.class);
Mockito.when(volume1.isResourceAvailable()).thenReturn(true);
Mockito.when(volume2.isResourceAvailable()).thenReturn(true);
Mockito.when(volume3.isResourceAvailable()).thenReturn(true);
Mockito.when(volume4.isResourceAvailable()).thenReturn(true);
Mockito.when(volume5.isResourceAvailable()).thenReturn(true);
// Make volumes 4 and 5 required.
Mockito.when(volume4.isRequired()).thenReturn(true);
Mockito.when(volume5.isRequired()).thenReturn(true);
volumes.put("volume1", volume1);
volumes.put("volume2", volume2);
volumes.put("volume3", volume3);
volumes.put("volume4", volume4);
volumes.put("volume5", volume5);
nnrc.setVolumes(volumes);
NameNodeResourceChecker spyNnrc = Mockito.spy(nnrc);
// Initially all dirs have space.
assertTrue(nnrc.hasAvailableDiskSpace());
Mockito.when(spyNnrc.getVolumesLowOnSpace()).thenReturn(
Lists.newArrayList("volume1"));
// 1/3 redundant dir is low on space.
Mockito.when(volume1.isResourceAvailable()).thenReturn(false);
assertTrue(nnrc.hasAvailableDiskSpace());
assertTrue(spyNnrc.hasAvailableDiskSpace());
// 2/3 redundant dirs are low on space.
Mockito.when(volume2.isResourceAvailable()).thenReturn(false);
assertFalse(nnrc.hasAvailableDiskSpace());
Mockito.when(spyNnrc.getVolumesLowOnSpace()).thenReturn(
Lists.newArrayList("volume1", "volume2"));
// Lower the minimum number of redundant volumes that must be available.
nnrc.setMinimumReduntdantVolumes(1);
assertTrue(nnrc.hasAvailableDiskSpace());
assertFalse(spyNnrc.hasAvailableDiskSpace());
// Just one required dir is low on space.
Mockito.when(volume3.isResourceAvailable()).thenReturn(false);
assertFalse(nnrc.hasAvailableDiskSpace());
// Just the other required dir is low on space.
Mockito.when(volume3.isResourceAvailable()).thenReturn(true);
Mockito.when(volume4.isResourceAvailable()).thenReturn(false);
assertFalse(nnrc.hasAvailableDiskSpace());
}
}

View File

@ -0,0 +1,107 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.util.ArrayList;
import java.util.Collection;
import org.junit.Test;
public class TestNameNodeResourcePolicy {
@Test
public void testSingleRedundantResource() {
assertTrue(testResourceScenario(1, 0, 0, 0, 1));
assertFalse(testResourceScenario(1, 0, 1, 0, 1));
}
@Test
public void testSingleRequiredResource() {
assertTrue(testResourceScenario(0, 1, 0, 0, 0));
assertFalse(testResourceScenario(0, 1, 0, 1, 0));
}
@Test
public void testMultipleRedundantResources() {
assertTrue(testResourceScenario(4, 0, 0, 0, 4));
assertFalse(testResourceScenario(4, 0, 1, 0, 4));
assertTrue(testResourceScenario(4, 0, 1, 0, 3));
assertFalse(testResourceScenario(4, 0, 2, 0, 3));
assertTrue(testResourceScenario(4, 0, 2, 0, 2));
assertFalse(testResourceScenario(4, 0, 3, 0, 2));
assertTrue(testResourceScenario(4, 0, 3, 0, 1));
assertFalse(testResourceScenario(4, 0, 4, 0, 1));
try {
testResourceScenario(1, 0, 0, 0, 2);
fail("Should fail if there are more minimum redundant resources than " +
"total redundant resources");
} catch (RuntimeException rte) {
assertTrue(rte.getMessage().startsWith("Need a minimum"));
}
}
@Test
public void testMultipleRequiredResources() {
assertTrue(testResourceScenario(0, 3, 0, 0, 0));
assertFalse(testResourceScenario(0, 3, 0, 1, 0));
assertFalse(testResourceScenario(0, 3, 0, 2, 0));
assertFalse(testResourceScenario(0, 3, 0, 3, 0));
}
@Test
public void testRedundantWithRequiredResources() {
assertTrue(testResourceScenario(2, 2, 0, 0, 1));
assertTrue(testResourceScenario(2, 2, 1, 0, 1));
assertFalse(testResourceScenario(2, 2, 2, 0, 1));
assertFalse(testResourceScenario(2, 2, 0, 1, 1));
assertFalse(testResourceScenario(2, 2, 1, 1, 1));
assertFalse(testResourceScenario(2, 2, 2, 1, 1));
}
private static boolean testResourceScenario(
int numRedundantResources,
int numRequiredResources,
int numFailedRedundantResources,
int numFailedRequiredResources,
int minimumRedundantResources) {
Collection<CheckableNameNodeResource> resources =
new ArrayList<CheckableNameNodeResource>();
for (int i = 0; i < numRedundantResources; i++) {
CheckableNameNodeResource r = mock(CheckableNameNodeResource.class);
when(r.isRequired()).thenReturn(false);
when(r.isResourceAvailable()).thenReturn(i >= numFailedRedundantResources);
resources.add(r);
}
for (int i = 0; i < numRequiredResources; i++) {
CheckableNameNodeResource r = mock(CheckableNameNodeResource.class);
when(r.isRequired()).thenReturn(true);
when(r.isResourceAvailable()).thenReturn(i >= numFailedRequiredResources);
resources.add(r);
}
return NameNodeResourcePolicy.areResourcesAvailable(resources,
minimumRedundantResources);
}
}