HDFS-14844. Make buffer of BlockReaderRemote#newBlockReader#BufferedOutputStream configurable. Contributed by Lisheng Sun.

This commit is contained in:
Inigo Goiri 2019-09-20 11:45:01 -07:00
parent b3173e1f58
commit 3f223bebfa
5 changed files with 26 additions and 4 deletions

View File

@ -148,6 +148,9 @@ public interface HdfsClientConfigKeys {
"dfs.client.key.provider.cache.expiry";
long DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_DEFAULT =
TimeUnit.DAYS.toMillis(10); // 10 days
String DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY =
"dfs.client.block.reader.remote.buffer.size";
int DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT = 8192;
String DFS_DATANODE_KERBEROS_PRINCIPAL_KEY =
"dfs.datanode.kerberos.principal";

View File

@ -855,7 +855,7 @@ private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
fileName, block, token, startOffset, length,
verifyChecksum, clientName, peer, datanode,
clientContext.getPeerCache(), cachingStrategy,
networkDistance);
networkDistance, configuration);
}
@Override

View File

@ -29,6 +29,7 @@
import java.util.UUID;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ReadOption;
import org.apache.hadoop.hdfs.BlockReader;
import org.apache.hadoop.hdfs.PeerCache;
@ -55,6 +56,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY;
/**
* This is a wrapper around connection to datanode
* and understands checksum, offset etc.
@ -391,10 +395,13 @@ public static BlockReader newBlockReader(String file,
Peer peer, DatanodeID datanodeID,
PeerCache peerCache,
CachingStrategy cachingStrategy,
int networkDistance) throws IOException {
int networkDistance, Configuration configuration) throws IOException {
// in and out will be closed when sock is closed (by the caller)
int bufferSize = configuration.getInt(
DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY,
DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT);
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
peer.getOutputStream()));
peer.getOutputStream(), bufferSize));
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum, cachingStrategy);

View File

@ -129,7 +129,7 @@ private BlockReader createBlockReader(long offsetInBlock) {
return BlockReaderRemote.newBlockReader(
"dummy", block, blockToken, offsetInBlock,
block.getNumBytes() - offsetInBlock, true, "", peer, source,
null, stripedReader.getCachingStrategy(), -1);
null, stripedReader.getCachingStrategy(), -1, conf);
} catch (IOException e) {
LOG.info("Exception while creating remote block reader, datanode {}",
source, e);

View File

@ -4098,6 +4098,18 @@
</description>
</property>
<property>
<name>dfs.client.block.reader.remote.buffer.size</name>
<value>8192</value>
<description>
The output stream buffer size of a DFSClient remote read. The buffer default value is 8KB. The buffer includes
only some request parameters that are: block, blockToken, clientName, startOffset, len, verifyChecksum,
cachingStrategy.
It is recommended to adjust the value according to the workload, which can reduce unnecessary memory
usage and the frequency of the garbage collection. A value of 512 might be reasonable.
</description>
</property>
<property>
<name>dfs.content-summary.limit</name>
<value>5000</value>