HDFS-6109 let sync_file_range() system call run in background (Liang Xie via stack)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1599347 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0aad2d56df
commit
f15ff5e4f4
@ -464,6 +464,9 @@ Release 2.5.0 - UNRELEASED
|
||||
HDFS-6110 adding more slow action log in critical write path
|
||||
(Liang Xie via stack)
|
||||
|
||||
HDFS-6109 let sync_file_range() system call run in background
|
||||
(Liang Xie via stack)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
|
||||
|
@ -111,6 +111,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||
public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_DEFAULT = false;
|
||||
public static final String DFS_DATANODE_SYNC_BEHIND_WRITES_KEY = "dfs.datanode.sync.behind.writes";
|
||||
public static final boolean DFS_DATANODE_SYNC_BEHIND_WRITES_DEFAULT = false;
|
||||
public static final String DFS_DATANODE_SYNC_BEHIND_WRITES_IN_BACKGROUND_KEY = "dfs.datanode.sync.behind.writes.in.background";
|
||||
public static final boolean DFS_DATANODE_SYNC_BEHIND_WRITES_IN_BACKGROUND_DEFAULT = false;
|
||||
public static final String DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY = "dfs.datanode.drop.cache.behind.reads";
|
||||
public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false;
|
||||
public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname";
|
||||
|
@ -104,6 +104,7 @@ class BlockReceiver implements Closeable {
|
||||
private boolean dropCacheBehindWrites;
|
||||
private long lastCacheManagementOffset = 0;
|
||||
private boolean syncBehindWrites;
|
||||
private boolean syncBehindWritesInBackground;
|
||||
|
||||
/** The client name. It is empty if a datanode is the client */
|
||||
private final String clientname;
|
||||
@ -207,6 +208,8 @@ class BlockReceiver implements Closeable {
|
||||
datanode.getDnConf().dropCacheBehindWrites :
|
||||
cachingStrategy.getDropBehind();
|
||||
this.syncBehindWrites = datanode.getDnConf().syncBehindWrites;
|
||||
this.syncBehindWritesInBackground = datanode.getDnConf().
|
||||
syncBehindWritesInBackground;
|
||||
|
||||
final boolean isCreate = isDatanode || isTransfer
|
||||
|| stage == BlockConstructionStage.PIPELINE_SETUP_CREATE;
|
||||
@ -668,10 +671,17 @@ class BlockReceiver implements Closeable {
|
||||
// of file
|
||||
//
|
||||
if (syncBehindWrites) {
|
||||
NativeIO.POSIX.syncFileRangeIfPossible(outFd,
|
||||
lastCacheManagementOffset,
|
||||
offsetInBlock - lastCacheManagementOffset,
|
||||
NativeIO.POSIX.SYNC_FILE_RANGE_WRITE);
|
||||
if (syncBehindWritesInBackground) {
|
||||
this.datanode.getFSDataset().submitBackgroundSyncFileRangeRequest(
|
||||
block, outFd, lastCacheManagementOffset,
|
||||
offsetInBlock - lastCacheManagementOffset,
|
||||
NativeIO.POSIX.SYNC_FILE_RANGE_WRITE);
|
||||
} else {
|
||||
NativeIO.POSIX.syncFileRangeIfPossible(outFd,
|
||||
lastCacheManagementOffset, offsetInBlock
|
||||
- lastCacheManagementOffset,
|
||||
NativeIO.POSIX.SYNC_FILE_RANGE_WRITE);
|
||||
}
|
||||
}
|
||||
//
|
||||
// For POSIX_FADV_DONTNEED, we want to drop from the beginning
|
||||
|
@ -67,6 +67,7 @@ public class DNConf {
|
||||
final boolean transferToAllowed;
|
||||
final boolean dropCacheBehindWrites;
|
||||
final boolean syncBehindWrites;
|
||||
final boolean syncBehindWritesInBackground;
|
||||
final boolean dropCacheBehindReads;
|
||||
final boolean syncOnClose;
|
||||
final boolean encryptDataTransfer;
|
||||
@ -119,6 +120,9 @@ public class DNConf {
|
||||
syncBehindWrites = conf.getBoolean(
|
||||
DFSConfigKeys.DFS_DATANODE_SYNC_BEHIND_WRITES_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_SYNC_BEHIND_WRITES_DEFAULT);
|
||||
syncBehindWritesInBackground = conf.getBoolean(
|
||||
DFSConfigKeys.DFS_DATANODE_SYNC_BEHIND_WRITES_IN_BACKGROUND_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_SYNC_BEHIND_WRITES_IN_BACKGROUND_DEFAULT);
|
||||
dropCacheBehindReads = conf.getBoolean(
|
||||
DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT);
|
||||
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileDescriptor;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
@ -431,5 +432,12 @@ public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
|
||||
* @return true when trash is enabled
|
||||
*/
|
||||
public boolean trashEnabled(String bpid);
|
||||
|
||||
/**
|
||||
* submit a sync_file_range request to AsyncDiskService
|
||||
*/
|
||||
public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block,
|
||||
final FileDescriptor fd, final long offset, final long nbytes,
|
||||
final int flags);
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileDescriptor;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
@ -31,6 +32,8 @@ import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||
import org.apache.hadoop.io.nativeio.NativeIOException;
|
||||
|
||||
/**
|
||||
* This class is a container of multiple thread pools, each for a volume,
|
||||
@ -42,6 +45,7 @@ import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
||||
* can be slow, and we don't want to use a single thread pool because that
|
||||
* is inefficient when we have more than 1 volume. AsyncDiskService is the
|
||||
* solution for these.
|
||||
* Another example of async disk operation is requesting sync_file_range().
|
||||
*
|
||||
* This class and {@link org.apache.hadoop.util.AsyncDiskService} are similar.
|
||||
* They should be combined.
|
||||
@ -148,6 +152,21 @@ class FsDatasetAsyncDiskService {
|
||||
}
|
||||
}
|
||||
|
||||
public void submitSyncFileRangeRequest(FsVolumeImpl volume,
|
||||
final FileDescriptor fd, final long offset, final long nbytes,
|
||||
final int flags) {
|
||||
execute(volume.getCurrentDir(), new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
NativeIO.POSIX.syncFileRangeIfPossible(fd, offset, nbytes, flags);
|
||||
} catch (NativeIOException e) {
|
||||
LOG.warn("sync_file_range error", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the block file and meta file from the disk asynchronously, adjust
|
||||
* dfsUsed statistics accordingly.
|
||||
|
@ -1907,5 +1907,13 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||
}
|
||||
return new RollingLogsImpl(dir, prefix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void submitBackgroundSyncFileRangeRequest(ExtendedBlock block,
|
||||
FileDescriptor fd, long offset, long nbytes, int flags) {
|
||||
FsVolumeImpl fsVolumeImpl = this.getVolume(block);
|
||||
asyncDiskService.submitSyncFileRangeRequest(fsVolumeImpl, fd, offset,
|
||||
nbytes, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
package org.apache.hadoop.hdfs.server.datanode;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileDescriptor;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
@ -1112,5 +1113,11 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
|
||||
public FsVolumeSpi getVolume(ExtendedBlock b) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void submitBackgroundSyncFileRangeRequest(ExtendedBlock block,
|
||||
FileDescriptor fd, long offset, long nbytes, int flags) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user