diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index 97a8472b53..d1ca63d8ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -148,6 +148,9 @@ public interface HdfsClientConfigKeys { "dfs.client.key.provider.cache.expiry"; long DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_DEFAULT = TimeUnit.DAYS.toMillis(10); // 10 days + String DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY = + "dfs.client.block.reader.remote.buffer.size"; + int DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT = 8192; String DFS_DATANODE_KERBEROS_PRINCIPAL_KEY = "dfs.datanode.kerberos.principal"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java index 88b168621d..2109e6e1b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java @@ -855,7 +855,7 @@ private BlockReader getRemoteBlockReader(Peer peer) throws IOException { fileName, block, token, startOffset, length, verifyChecksum, clientName, peer, datanode, clientContext.getPeerCache(), cachingStrategy, - networkDistance); + networkDistance, configuration); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java index ea1baed45d..0f2f54c28e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderRemote.java @@ -29,6 +29,7 @@ import java.util.UUID; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.BlockReader; import org.apache.hadoop.hdfs.PeerCache; @@ -55,6 +56,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY; + /** * This is a wrapper around connection to datanode * and understands checksum, offset etc. @@ -391,10 +395,13 @@ public static BlockReader newBlockReader(String file, Peer peer, DatanodeID datanodeID, PeerCache peerCache, CachingStrategy cachingStrategy, - int networkDistance) throws IOException { + int networkDistance, Configuration configuration) throws IOException { // in and out will be closed when sock is closed (by the caller) + int bufferSize = configuration.getInt( + DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_KEY, + DFS_CLIENT_BLOCK_READER_REMOTE_BUFFER_SIZE_DEFAULT); final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( - peer.getOutputStream())); + peer.getOutputStream(), bufferSize)); new Sender(out).readBlock(block, blockToken, clientName, startOffset, len, verifyChecksum, cachingStrategy); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java index 0db8a6f499..4dc51c9916 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReader.java @@ -129,7 +129,7 @@ private BlockReader createBlockReader(long offsetInBlock) { return BlockReaderRemote.newBlockReader( "dummy", block, blockToken, offsetInBlock, block.getNumBytes() - offsetInBlock, true, "", peer, source, - null, stripedReader.getCachingStrategy(), -1); + null, stripedReader.getCachingStrategy(), -1, conf); } catch (IOException e) { LOG.info("Exception while creating remote block reader, datanode {}", source, e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 63a99d8e66..d34e4cd561 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -4098,6 +4098,18 @@ + + dfs.client.block.reader.remote.buffer.size + 8192 + + The output stream buffer size of a DFSClient remote read. The buffer default value is 8KB. The buffer includes + only some request parameters that are: block, blockToken, clientName, startOffset, len, verifyChecksum, + cachingStrategy. + It is recommended to adjust the value according to the workload, which can reduce unnecessary memory + usage and the frequency of the garbage collection. A value of 512 might be reasonable. + + + dfs.content-summary.limit 5000