HDFS-5291. Standby namenode after transition to active goes into safemode. Contributed by Jing Zhao.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1530112 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2013-10-07 23:58:44 +00:00
parent 4e46d2066e
commit 1fe1942328
6 changed files with 199 additions and 142 deletions

View File

@ -34,6 +34,7 @@
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.RetriableException;
import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.net.ConnectTimeoutException;
@ -531,6 +532,15 @@ public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
this.maxDelayBase = maxDelayBase; this.maxDelayBase = maxDelayBase;
} }
/**
* @return 0 if this is our first failover/retry (i.e., retry immediately),
* sleep exponentially otherwise
*/
private long getFailoverOrRetrySleepTime(int times) {
return times == 0 ? 0 :
calculateExponentialTime(delayMillis, times, maxDelayBase);
}
@Override @Override
public RetryAction shouldRetry(Exception e, int retries, public RetryAction shouldRetry(Exception e, int retries,
int failovers, boolean isIdempotentOrAtMostOnce) throws Exception { int failovers, boolean isIdempotentOrAtMostOnce) throws Exception {
@ -546,11 +556,8 @@ public RetryAction shouldRetry(Exception e, int retries,
e instanceof StandbyException || e instanceof StandbyException ||
e instanceof ConnectTimeoutException || e instanceof ConnectTimeoutException ||
isWrappedStandbyException(e)) { isWrappedStandbyException(e)) {
return new RetryAction( return new RetryAction(RetryAction.RetryDecision.FAILOVER_AND_RETRY,
RetryAction.RetryDecision.FAILOVER_AND_RETRY, getFailoverOrRetrySleepTime(failovers));
// retry immediately if this is our first failover, sleep otherwise
failovers == 0 ? 0 :
calculateExponentialTime(delayMillis, failovers, maxDelayBase));
} else if (e instanceof SocketException || } else if (e instanceof SocketException ||
(e instanceof IOException && !(e instanceof RemoteException))) { (e instanceof IOException && !(e instanceof RemoteException))) {
if (isIdempotentOrAtMostOnce) { if (isIdempotentOrAtMostOnce) {
@ -560,11 +567,17 @@ public RetryAction shouldRetry(Exception e, int retries,
"the invoked method is not idempotent, and unable to determine " + "the invoked method is not idempotent, and unable to determine " +
"whether it was invoked"); "whether it was invoked");
} }
} else {
RetriableException re = getWrappedRetriableException(e);
if (re != null) {
return new RetryAction(RetryAction.RetryDecision.RETRY,
getFailoverOrRetrySleepTime(retries));
} else { } else {
return fallbackPolicy.shouldRetry(e, retries, failovers, return fallbackPolicy.shouldRetry(e, retries, failovers,
isIdempotentOrAtMostOnce); isIdempotentOrAtMostOnce);
} }
} }
}
} }
@ -596,4 +609,14 @@ private static boolean isWrappedStandbyException(Exception e) {
StandbyException.class); StandbyException.class);
return unwrapped instanceof StandbyException; return unwrapped instanceof StandbyException;
} }
private static RetriableException getWrappedRetriableException(Exception e) {
if (!(e instanceof RemoteException)) {
return null;
}
Exception unwrapped = ((RemoteException)e).unwrapRemoteException(
RetriableException.class);
return unwrapped instanceof RetriableException ?
(RetriableException) unwrapped : null;
}
} }

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ipc;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Exception thrown by a server typically to indicate that server is in a state
* where request cannot be processed temporarily (such as still starting up).
* Client may retry the request. If the service is up, the server may be able to
* process a retried request.
*/
@InterfaceStability.Evolving
public class RetriableException extends IOException {
private static final long serialVersionUID = 1915561725516487301L;
public RetriableException(Exception e) {
super(e);
}
public RetriableException(String msg) {
super(msg);
}
}

View File

@ -397,6 +397,9 @@ Release 2.2.0 - 2013-10-13
HDFS-5307. Support both HTTP and HTTPS in jsp pages (Haohui Mai via HDFS-5307. Support both HTTP and HTTPS in jsp pages (Haohui Mai via
branconli) branconli)
HDFS-5291. Standby namenode after transition to active goes into safemode.
(jing9)
Release 2.1.1-beta - 2013-09-23 Release 2.1.1-beta - 2013-09-23
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -209,6 +209,7 @@
import org.apache.hadoop.ipc.RetryCache; import org.apache.hadoop.ipc.RetryCache;
import org.apache.hadoop.ipc.RetryCache.CacheEntry; import org.apache.hadoop.ipc.RetryCache.CacheEntry;
import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload; import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
import org.apache.hadoop.ipc.RetriableException;
import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metric;
@ -1050,6 +1051,26 @@ public void checkOperation(OperationCategory op) throws StandbyException {
} }
} }
/**
* @throws RetriableException
* If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
* NameNode is in active state
* @throws SafeModeException
* Otherwise if NameNode is in SafeMode.
*/
private void checkNameNodeSafeMode(String errorMsg)
throws RetriableException, SafeModeException {
if (isInSafeMode()) {
SafeModeException se = new SafeModeException(errorMsg, safeMode);
if (haEnabled && haContext != null
&& haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
throw new RetriableException(se);
} else {
throw se;
}
}
}
public static Collection<URI> getNamespaceDirs(Configuration conf) { public static Collection<URI> getNamespaceDirs(Configuration conf) {
return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
} }
@ -1351,9 +1372,7 @@ private void setPermissionInt(String src, FsPermission permission)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot set permission for " + src);
throw new SafeModeException("Cannot set permission for " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
checkOwner(pc, src); checkOwner(pc, src);
dir.setPermission(src, permission); dir.setPermission(src, permission);
@ -1390,9 +1409,7 @@ private void setOwnerInt(String src, String username, String group)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot set owner for " + src);
throw new SafeModeException("Cannot set owner for " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
checkOwner(pc, src); checkOwner(pc, src);
if (!pc.isSuperUser()) { if (!pc.isSuperUser()) {
@ -1472,8 +1489,14 @@ private LocatedBlocks getBlockLocationsInt(String src, long offset,
for (LocatedBlock b : ret.getLocatedBlocks()) { for (LocatedBlock b : ret.getLocatedBlocks()) {
// if safemode & no block locations yet then throw safemodeException // if safemode & no block locations yet then throw safemodeException
if ((b.getLocations() == null) || (b.getLocations().length == 0)) { if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
throw new SafeModeException("Zero blocklocations for " + src, SafeModeException se = new SafeModeException(
safeMode); "Zero blocklocations for " + src, safeMode);
if (haEnabled && haContext != null &&
haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
throw new RetriableException(se);
} else {
throw se;
}
} }
} }
} }
@ -1614,9 +1637,7 @@ private void concatInt(String target, String [] srcs,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot concat " + target);
throw new SafeModeException("Cannot concat " + target, safeMode);
}
concatInternal(pc, target, srcs, logRetryCache); concatInternal(pc, target, srcs, logRetryCache);
resultingStat = getAuditFileInfo(target, false); resultingStat = getAuditFileInfo(target, false);
} finally { } finally {
@ -1764,9 +1785,7 @@ private void setTimesInt(String src, long mtime, long atime)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot set times " + src);
throw new SafeModeException("Cannot set times " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
// Write access is required to set access and modification times // Write access is required to set access and modification times
@ -1829,9 +1848,7 @@ private void createSymlinkInt(String target, String link,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot create symlink " + link);
throw new SafeModeException("Cannot create symlink " + link, safeMode);
}
link = FSDirectory.resolvePath(link, pathComponents, dir); link = FSDirectory.resolvePath(link, pathComponents, dir);
if (!createParent) { if (!createParent) {
verifyParentDir(link); verifyParentDir(link);
@ -1889,9 +1906,7 @@ private boolean setReplicationInt(String src, final short replication)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot set replication for " + src);
throw new SafeModeException("Cannot set replication for " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
if (isPermissionEnabled) { if (isPermissionEnabled) {
checkPathAccess(pc, src, FsAction.WRITE); checkPathAccess(pc, src, FsAction.WRITE);
@ -2021,9 +2036,7 @@ private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot create file" + src);
throw new SafeModeException("Cannot create file" + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
startFileInternal(pc, src, permissions, holder, clientMachine, create, startFileInternal(pc, src, permissions, holder, clientMachine, create,
overwrite, createParent, replication, blockSize, logRetryCache); overwrite, createParent, replication, blockSize, logRetryCache);
@ -2242,10 +2255,7 @@ boolean recoverLease(String src, String holder, String clientMachine)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot recover the lease of " + src);
throw new SafeModeException(
"Cannot recover the lease of " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
if (!inode.isUnderConstruction()) { if (!inode.isUnderConstruction()) {
@ -2396,9 +2406,7 @@ private LocatedBlock appendFileInt(String src, String holder,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot append to file" + src);
throw new SafeModeException("Cannot append to file" + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache); lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
} catch (StandbyException se) { } catch (StandbyException se) {
@ -2548,9 +2556,7 @@ INodesInPath analyzeFileState(String src,
checkBlock(previous); checkBlock(previous);
onRetryBlock[0] = null; onRetryBlock[0] = null;
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot add block to " + src);
throw new SafeModeException("Cannot add block to " + src, safeMode);
}
// have we exceeded the configured limit of fs objects. // have we exceeded the configured limit of fs objects.
checkFsObjectLimit(); checkFsObjectLimit();
@ -2659,10 +2665,7 @@ LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
try { try {
checkOperation(OperationCategory.READ); checkOperation(OperationCategory.READ);
//check safe mode //check safe mode
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
throw new SafeModeException("Cannot add datanode; src=" + src
+ ", blk=" + blk, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
//check lease //check lease
@ -2707,10 +2710,7 @@ boolean abandonBlock(ExtendedBlock b, String src, String holder)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
throw new SafeModeException("Cannot abandon block " + b +
" for fle" + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
// //
@ -2793,9 +2793,7 @@ boolean completeFile(String src, String holder,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot complete file " + src);
throw new SafeModeException("Cannot complete file " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
success = completeFileInternal(src, holder, success = completeFileInternal(src, holder,
ExtendedBlock.getLocalBlock(last), fileId); ExtendedBlock.getLocalBlock(last), fileId);
@ -2971,9 +2969,7 @@ private boolean renameToInt(String src, String dst, boolean logRetryCache)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot rename " + src);
throw new SafeModeException("Cannot rename " + src, safeMode);
}
src = FSDirectory.resolvePath(src, srcComponents, dir); src = FSDirectory.resolvePath(src, srcComponents, dir);
dst = FSDirectory.resolvePath(dst, dstComponents, dir); dst = FSDirectory.resolvePath(dst, dstComponents, dir);
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
@ -3044,9 +3040,7 @@ void renameTo(String src, String dst, Options.Rename... options)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot rename " + src);
throw new SafeModeException("Cannot rename " + src, safeMode);
}
src = FSDirectory.resolvePath(src, srcComponents, dir); src = FSDirectory.resolvePath(src, srcComponents, dir);
dst = FSDirectory.resolvePath(dst, dstComponents, dir); dst = FSDirectory.resolvePath(dst, dstComponents, dir);
renameToInternal(pc, src, dst, cacheEntry != null, options); renameToInternal(pc, src, dst, cacheEntry != null, options);
@ -3152,9 +3146,7 @@ private boolean deleteInternal(String src, boolean recursive,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot delete " + src);
throw new SafeModeException("Cannot delete " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
if (!recursive && dir.isNonEmptyDirectory(src)) { if (!recursive && dir.isNonEmptyDirectory(src)) {
throw new IOException(src + " is non empty"); throw new IOException(src + " is non empty");
@ -3373,9 +3365,7 @@ private boolean mkdirsInt(String src, PermissionStatus permissions,
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot create directory " + src);
throw new SafeModeException("Cannot create directory " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
status = mkdirsInternal(pc, src, permissions, createParent); status = mkdirsInternal(pc, src, permissions, createParent);
if (status) { if (status) {
@ -3475,9 +3465,7 @@ void setQuota(String path, long nsQuota, long dsQuota)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot set quota on " + path);
throw new SafeModeException("Cannot set quota on " + path, safeMode);
}
dir.setQuota(path, nsQuota, dsQuota); dir.setQuota(path, nsQuota, dsQuota);
} finally { } finally {
writeUnlock(); writeUnlock();
@ -3500,9 +3488,7 @@ void fsync(String src, String clientName, long lastBlockLength)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot fsync file " + src);
throw new SafeModeException("Cannot fsync file " + src, safeMode);
}
src = FSDirectory.resolvePath(src, pathComponents, dir); src = FSDirectory.resolvePath(src, pathComponents, dir);
INodeFileUnderConstruction pendingFile = checkLease(src, clientName); INodeFileUnderConstruction pendingFile = checkLease(src, clientName);
if (lastBlockLength > 0) { if (lastBlockLength > 0) {
@ -3727,11 +3713,8 @@ void commitBlockSynchronization(ExtendedBlock lastblock,
// If a DN tries to commit to the standby, the recovery will // If a DN tries to commit to the standby, the recovery will
// fail, and the next retry will succeed on the new NN. // fail, and the next retry will succeed on the new NN.
if (isInSafeMode()) { checkNameNodeSafeMode(
throw new SafeModeException( "Cannot commitBlockSynchronization while in safe mode");
"Cannot commitBlockSynchronization while in safe mode",
safeMode);
}
final BlockInfo storedBlock = getStoredBlock( final BlockInfo storedBlock = getStoredBlock(
ExtendedBlock.getLocalBlock(lastblock)); ExtendedBlock.getLocalBlock(lastblock));
if (storedBlock == null) { if (storedBlock == null) {
@ -3877,9 +3860,7 @@ void renewLease(String holder) throws IOException {
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot renew lease for " + holder);
throw new SafeModeException("Cannot renew lease for " + holder, safeMode);
}
leaseManager.renewLease(holder); leaseManager.renewLease(holder);
} finally { } finally {
writeUnlock(); writeUnlock();
@ -4262,8 +4243,8 @@ void saveNamespace() throws AccessControlException, IOException {
try { try {
checkOperation(OperationCategory.UNCHECKED); checkOperation(OperationCategory.UNCHECKED);
if (!isInSafeMode()) { if (!isInSafeMode()) {
throw new IOException("Safe mode should be turned ON " + throw new IOException("Safe mode should be turned ON "
"in order to create namespace image."); + "in order to create namespace image.");
} }
getFSImage().saveNamespace(this); getFSImage().saveNamespace(this);
success = true; success = true;
@ -4340,7 +4321,7 @@ void setBalancerBandwidth(long bandwidth) throws IOException {
* replicas, and calculates the ratio of safe blocks to the total number * replicas, and calculates the ratio of safe blocks to the total number
* of blocks in the system, which is the size of blocks in * of blocks in the system, which is the size of blocks in
* {@link FSNamesystem#blockManager}. When the ratio reaches the * {@link FSNamesystem#blockManager}. When the ratio reaches the
* {@link #threshold} it starts the {@link SafeModeMonitor} daemon in order * {@link #threshold} it starts the SafeModeMonitor daemon in order
* to monitor whether the safe mode {@link #extension} is passed. * to monitor whether the safe mode {@link #extension} is passed.
* Then it leaves safe mode and destroys itself. * Then it leaves safe mode and destroys itself.
* <p> * <p>
@ -4348,10 +4329,9 @@ void setBalancerBandwidth(long bandwidth) throws IOException {
* not tracked because the name node is not intended to leave safe mode * not tracked because the name node is not intended to leave safe mode
* automatically in the case. * automatically in the case.
* *
* @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction) * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
* @see SafeModeMonitor
*/ */
class SafeModeInfo { public class SafeModeInfo {
// configuration fields // configuration fields
/** Safe mode threshold condition %.*/ /** Safe mode threshold condition %.*/
private double threshold; private double threshold;
@ -5093,9 +5073,7 @@ CheckpointSignature rollEditLog() throws IOException {
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.JOURNAL); checkOperation(OperationCategory.JOURNAL);
if (isInSafeMode()) { checkNameNodeSafeMode("Log not rolled");
throw new SafeModeException("Log not rolled", safeMode);
}
LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
return getFSImage().rollEditLog(); return getFSImage().rollEditLog();
} finally { } finally {
@ -5116,9 +5094,7 @@ NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
try { try {
checkOperation(OperationCategory.CHECKPOINT); checkOperation(OperationCategory.CHECKPOINT);
if (isInSafeMode()) { checkNameNodeSafeMode("Checkpoint not started");
throw new SafeModeException("Checkpoint not started", safeMode);
}
LOG.info("Start checkpoint for " + backupNode.getAddress()); LOG.info("Start checkpoint for " + backupNode.getAddress());
cmd = getFSImage().startCheckpoint(backupNode, activeNamenode); cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
getEditLog().logSync(); getEditLog().logSync();
@ -5152,9 +5128,7 @@ void endCheckpoint(NamenodeRegistration registration,
try { try {
checkOperation(OperationCategory.CHECKPOINT); checkOperation(OperationCategory.CHECKPOINT);
if (isInSafeMode()) { checkNameNodeSafeMode("Checkpoint not ended");
throw new SafeModeException("Checkpoint not ended", safeMode);
}
LOG.info("End checkpoint for " + registration.getAddress()); LOG.info("End checkpoint for " + registration.getAddress());
getFSImage().endCheckpoint(sig); getFSImage().endCheckpoint(sig);
success = true; success = true;
@ -5506,10 +5480,7 @@ long getLastAllocatedBlockId() {
long nextGenerationStamp(boolean legacyBlock) long nextGenerationStamp(boolean legacyBlock)
throws IOException, SafeModeException { throws IOException, SafeModeException {
assert hasWriteLock(); assert hasWriteLock();
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot get next generation stamp");
throw new SafeModeException(
"Cannot get next generation stamp", safeMode);
}
long gs; long gs;
if (legacyBlock) { if (legacyBlock) {
@ -5562,12 +5533,9 @@ boolean isLegacyBlock(Block block) {
/** /**
* Increments, logs and then returns the block ID * Increments, logs and then returns the block ID
*/ */
private long nextBlockId() throws SafeModeException { private long nextBlockId() throws IOException {
assert hasWriteLock(); assert hasWriteLock();
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot get next block ID");
throw new SafeModeException(
"Cannot get next block ID", safeMode);
}
final long blockId = blockIdGenerator.nextValue(); final long blockId = blockIdGenerator.nextValue();
getEditLog().logAllocateBlockId(blockId); getEditLog().logAllocateBlockId(blockId);
// NB: callers sync the log // NB: callers sync the log
@ -5577,10 +5545,8 @@ private long nextBlockId() throws SafeModeException {
private INodeFileUnderConstruction checkUCBlock(ExtendedBlock block, private INodeFileUnderConstruction checkUCBlock(ExtendedBlock block,
String clientName) throws IOException { String clientName) throws IOException {
assert hasWriteLock(); assert hasWriteLock();
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot get a new generation stamp and an "
throw new SafeModeException("Cannot get a new generation stamp and an " + + "access token for block " + block);
"access token for block " + block, safeMode);
}
// check stored block state // check stored block state
BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
@ -5693,9 +5659,7 @@ void updatePipeline(String clientName, ExtendedBlock oldBlock,
boolean success = false; boolean success = false;
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Pipeline not updated");
throw new SafeModeException("Pipeline not updated", safeMode);
}
assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
+ oldBlock + " has different block identifier"; + oldBlock + " has different block identifier";
updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
@ -5955,9 +5919,7 @@ Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot issue delegation token");
throw new SafeModeException("Cannot issue delegation token", safeMode);
}
if (!isAllowedDelegationTokenOp()) { if (!isAllowedDelegationTokenOp()) {
throw new IOException( throw new IOException(
"Delegation Token can be issued only with kerberos or web authentication"); "Delegation Token can be issued only with kerberos or web authentication");
@ -6002,9 +5964,7 @@ long renewDelegationToken(Token<DelegationTokenIdentifier> token)
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot renew delegation token");
throw new SafeModeException("Cannot renew delegation token", safeMode);
}
if (!isAllowedDelegationTokenOp()) { if (!isAllowedDelegationTokenOp()) {
throw new IOException( throw new IOException(
"Delegation Token can be renewed only with kerberos or web authentication"); "Delegation Token can be renewed only with kerberos or web authentication");
@ -6035,9 +5995,7 @@ void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot cancel delegation token");
throw new SafeModeException("Cannot cancel delegation token", safeMode);
}
String canceller = getRemoteUser().getUserName(); String canceller = getRemoteUser().getUserName();
DelegationTokenIdentifier id = dtSecretManager DelegationTokenIdentifier id = dtSecretManager
.cancelToken(token, canceller); .cancelToken(token, canceller);
@ -6558,10 +6516,7 @@ void allowSnapshot(String path) throws SafeModeException, IOException {
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot allow snapshot for " + path);
throw new SafeModeException("Cannot allow snapshot for " + path,
safeMode);
}
checkSuperuserPrivilege(); checkSuperuserPrivilege();
dir.writeLock(); dir.writeLock();
@ -6586,10 +6541,7 @@ void disallowSnapshot(String path) throws SafeModeException, IOException {
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
throw new SafeModeException("Cannot disallow snapshot for " + path,
safeMode);
}
checkSuperuserPrivilege(); checkSuperuserPrivilege();
dir.writeLock(); dir.writeLock();
@ -6627,10 +6579,7 @@ String createSnapshot(String snapshotRoot, String snapshotName)
String snapshotPath = null; String snapshotPath = null;
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
throw new SafeModeException("Cannot create snapshot for "
+ snapshotRoot, safeMode);
}
if (isPermissionEnabled) { if (isPermissionEnabled) {
checkOwner(pc, snapshotRoot); checkOwner(pc, snapshotRoot);
} }
@ -6679,10 +6628,7 @@ void renameSnapshot(String path, String snapshotOldName,
boolean success = false; boolean success = false;
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot rename snapshot for " + path);
throw new SafeModeException("Cannot rename snapshot for " + path,
safeMode);
}
if (isPermissionEnabled) { if (isPermissionEnabled) {
checkOwner(pc, path); checkOwner(pc, path);
} }
@ -6797,10 +6743,7 @@ void deleteSnapshot(String snapshotRoot, String snapshotName)
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) { checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
throw new SafeModeException(
"Cannot delete snapshot for " + snapshotRoot, safeMode);
}
if (isPermissionEnabled) { if (isPermissionEnabled) {
checkOwner(pc, snapshotRoot); checkOwner(pc, snapshotRoot);
} }

View File

@ -33,10 +33,7 @@
public class SafeModeException extends IOException { public class SafeModeException extends IOException {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
public SafeModeException() {}
public SafeModeException(String text, FSNamesystem.SafeModeInfo mode ) { public SafeModeException(String text, FSNamesystem.SafeModeInfo mode ) {
super(text + ". Name node is in safe mode.\n" + mode.getTurnOffTip()); super(text + ". Name node is in safe mode.\n" + mode.getTurnOffTip());
} }
} }

View File

@ -17,12 +17,18 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode.ha; package org.apache.hadoop.hdfs.server.namenode.ha;
import static org.junit.Assert.*; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.URI; import java.net.URI;
import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -101,6 +107,50 @@ public void shutdownCluster() {
} }
} }
/**
* Make sure the client retries when the active NN is in safemode
*/
@Test (timeout=300000)
public void testClientRetrySafeMode() throws Exception {
final Map<Path, Boolean> results = Collections
.synchronizedMap(new HashMap<Path, Boolean>());
final Path test = new Path("/test");
// let nn0 enter safemode
NameNodeAdapter.enterSafeMode(nn0, false);
LOG.info("enter safemode");
new Thread() {
@Override
public void run() {
try {
boolean mkdir = fs.mkdirs(test);
LOG.info("mkdir finished, result is " + mkdir);
synchronized (TestHASafeMode.this) {
results.put(test, mkdir);
TestHASafeMode.this.notifyAll();
}
} catch (Exception e) {
LOG.info("Got Exception while calling mkdir", e);
}
}
}.start();
// make sure the client's call has actually been handled by the active NN
assertFalse("The directory should not be created while NN in safemode",
fs.exists(test));
Thread.sleep(1000);
// let nn0 leave safemode
NameNodeAdapter.leaveSafeMode(nn0);
LOG.info("leave safemode");
synchronized (this) {
while (!results.containsKey(test)) {
this.wait();
}
assertTrue(results.get(test));
}
}
private void restartStandby() throws IOException { private void restartStandby() throws IOException {
cluster.shutdownNameNode(1); cluster.shutdownNameNode(1);
// Set the safemode extension to be lengthy, so that the tests // Set the safemode extension to be lengthy, so that the tests