HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)

This commit is contained in:
Andrew Wang 2014-09-18 17:35:24 -07:00
parent fe38d2e9b5
commit 20a076bafc
5 changed files with 122 additions and 91 deletions

View File

@ -562,6 +562,8 @@ Release 2.6.0 - UNRELEASED
HDFS-6727. Refresh data volumes on DataNode based on configuration changes HDFS-6727. Refresh data volumes on DataNode based on configuration changes
(Lei Xu via cmccabe) (Lei Xu via cmccabe)
HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang) HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -41,6 +41,7 @@
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.crypto.CipherSuite;
import org.apache.hadoop.fs.CanSetDropBehind; import org.apache.hadoop.fs.CanSetDropBehind;
@ -76,6 +77,7 @@
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
import org.apache.hadoop.hdfs.server.namenode.RetryStartFileException;
import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
@ -126,6 +128,13 @@
public class DFSOutputStream extends FSOutputSummer public class DFSOutputStream extends FSOutputSummer
implements Syncable, CanSetDropBehind { implements Syncable, CanSetDropBehind {
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
/**
* Number of times to retry creating a file when there are transient
* errors (typically related to encryption zones and KeyProvider operations).
*/
@VisibleForTesting
public static final int CREATE_RETRY_COUNT = 10;
private final DFSClient dfsClient; private final DFSClient dfsClient;
private final long dfsclientSlowLogThresholdMs; private final long dfsclientSlowLogThresholdMs;
private Socket s; private Socket s;
@ -1648,23 +1657,46 @@ static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src,
short replication, long blockSize, Progressable progress, int buffersize, short replication, long blockSize, Progressable progress, int buffersize,
DataChecksum checksum, String[] favoredNodes, DataChecksum checksum, String[] favoredNodes,
List<CipherSuite> cipherSuites) throws IOException { List<CipherSuite> cipherSuites) throws IOException {
final HdfsFileStatus stat; HdfsFileStatus stat = null;
// Retry the create if we get a RetryStartFileException up to a maximum
// number of times
boolean shouldRetry = true;
int retryCount = CREATE_RETRY_COUNT;
while (shouldRetry) {
shouldRetry = false;
try { try {
stat = dfsClient.namenode.create(src, masked, dfsClient.clientName, stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
new EnumSetWritable<CreateFlag>(flag), createParent, replication, new EnumSetWritable<CreateFlag>(flag), createParent, replication,
blockSize, cipherSuites); blockSize, cipherSuites);
} catch(RemoteException re) { break;
throw re.unwrapRemoteException(AccessControlException.class, } catch (RemoteException re) {
IOException e = re.unwrapRemoteException(
AccessControlException.class,
DSQuotaExceededException.class, DSQuotaExceededException.class,
FileAlreadyExistsException.class, FileAlreadyExistsException.class,
FileNotFoundException.class, FileNotFoundException.class,
ParentNotDirectoryException.class, ParentNotDirectoryException.class,
NSQuotaExceededException.class, NSQuotaExceededException.class,
RetryStartFileException.class,
SafeModeException.class, SafeModeException.class,
UnresolvedPathException.class, UnresolvedPathException.class,
SnapshotAccessControlException.class, SnapshotAccessControlException.class,
UnknownCipherSuiteException.class); UnknownCipherSuiteException.class);
if (e instanceof RetryStartFileException) {
if (retryCount > 0) {
shouldRetry = true;
retryCount--;
} else {
throw new IOException("Too many retries because of encryption" +
" zone operations", e);
} }
} else {
throw e;
}
}
}
Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat, final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat,
flag, progress, checksum, favoredNodes); flag, progress, checksum, favoredNodes);
out.start(); out.start();

View File

@ -2490,31 +2490,18 @@ private HdfsFileStatus startFileInt(final String srcArg,
waitForLoadingFSImage(); waitForLoadingFSImage();
/* /**
* We want to avoid holding any locks while doing KeyProvider operations, * If the file is in an encryption zone, we optimistically create an
* since they can be very slow. Since the path can * EDEK for the file by calling out to the configured KeyProvider.
* flip flop between being in an encryption zone and not in the meantime, * Since this typically involves doing an RPC, we take the readLock
* we need to recheck the preconditions and redo KeyProvider operations * initially, then drop it to do the RPC.
* in some situations.
* *
* A special RetryStartFileException is used to indicate that we should * Since the path can flip-flop between being in an encryption zone and not
* retry creation of a FileEncryptionInfo. * in the meantime, we need to recheck the preconditions when we retake the
* lock to do the create. If the preconditions are not met, we throw a
* special RetryStartFileException to ask the DFSClient to try the create
* again later.
*/ */
BlocksMapUpdateInfo toRemoveBlocks = null;
try {
boolean shouldContinue = true;
int iters = 0;
while (shouldContinue) {
skipSync = false;
if (iters >= 10) {
throw new IOException("Too many retries because of encryption zone " +
"operations, something might be broken!");
}
shouldContinue = false;
iters++;
// Optimistically determine CipherSuite and ezKeyName if the path is
// currently within an encryption zone
CipherSuite suite = null; CipherSuite suite = null;
String ezKeyName = null; String ezKeyName = null;
readLock(); readLock();
@ -2539,11 +2526,15 @@ private HdfsFileStatus startFileInt(final String srcArg,
(suite == null && ezKeyName == null) || (suite == null && ezKeyName == null) ||
(suite != null && ezKeyName != null), (suite != null && ezKeyName != null),
"Both suite and ezKeyName should both be null or not null"); "Both suite and ezKeyName should both be null or not null");
// Generate EDEK if necessary while not holding the lock // Generate EDEK if necessary while not holding the lock
EncryptedKeyVersion edek = EncryptedKeyVersion edek =
generateEncryptedDataEncryptionKey(ezKeyName); generateEncryptedDataEncryptionKey(ezKeyName);
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
// Try to create the file with the computed cipher suite and EDEK
// Proceed with the create, using the computed cipher suite and
// generated EDEK
BlocksMapUpdateInfo toRemoveBlocks = null;
writeLock(); writeLock();
try { try {
checkOperation(OperationCategory.WRITE); checkOperation(OperationCategory.WRITE);
@ -2557,17 +2548,8 @@ private HdfsFileStatus startFileInt(final String srcArg,
} catch (StandbyException se) { } catch (StandbyException se) {
skipSync = true; skipSync = true;
throw se; throw se;
} catch (RetryStartFileException e) {
shouldContinue = true;
if (LOG.isTraceEnabled()) {
LOG.trace("Preconditions failed, retrying creation of " +
"FileEncryptionInfo", e);
}
} finally { } finally {
writeUnlock(); writeUnlock();
}
}
} finally {
// There might be transactions logged while trying to recover the lease. // There might be transactions logged while trying to recover the lease.
// They need to be sync'ed even when an exception was thrown. // They need to be sync'ed even when an exception was thrown.
if (!skipSync) { if (!skipSync) {

View File

@ -17,5 +17,20 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
public class RetryStartFileException extends Exception { import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
@InterfaceAudience.Private
public class RetryStartFileException extends IOException {
private static final long serialVersionUID = 1L;
public RetryStartFileException() {
super("Preconditions for creating a file failed because of a " +
"transient error, retry create later.");
}
public RetryStartFileException(String s) {
super(s);
}
} }

View File

@ -940,7 +940,7 @@ public void doCleanup() throws Exception {
Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file)); Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file));
// Flip-flop between two EZs to repeatedly fail // Flip-flop between two EZs to repeatedly fail
for (int i=0; i<10; i++) { for (int i=0; i<DFSOutputStream.CREATE_RETRY_COUNT+1; i++) {
injector.ready.await(); injector.ready.await();
fsWrapper.delete(zone1, true); fsWrapper.delete(zone1, true);
fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true); fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true);