HDFS-17242. Make congestion backoff time configurable. (#6227)

Reviewed-by: Xing Lin <xinglin@linkedin.com>
Reviewed-by: Ayush Saxena <ayushsaxena@apache.org>
Signed-off-by: Tao Li <tomscut@apache.org>
This commit is contained in:
hfutatzhanghb 2023-12-13 20:56:53 +08:00 committed by GitHub
parent 19b9e6a97b
commit 562c42c86a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 7 deletions

View File

@ -46,6 +46,7 @@
import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.BlockWrite; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.BlockWrite;
import org.apache.hadoop.hdfs.client.impl.DfsClientConf; import org.apache.hadoop.hdfs.client.impl.DfsClientConf;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@ -528,9 +529,8 @@ boolean doWaitForRestart() {
// are congested // are congested
private final List<DatanodeInfo> congestedNodes = new ArrayList<>(); private final List<DatanodeInfo> congestedNodes = new ArrayList<>();
private final Map<DatanodeInfo, Integer> slowNodeMap = new HashMap<>(); private final Map<DatanodeInfo, Integer> slowNodeMap = new HashMap<>();
private static final int CONGESTION_BACKOFF_MEAN_TIME_IN_MS = 5000; private int congestionBackOffMeanTimeInMs;
private static final int CONGESTION_BACK_OFF_MAX_TIME_IN_MS = private int congestionBackOffMaxTimeInMs;
CONGESTION_BACKOFF_MEAN_TIME_IN_MS * 10;
private int lastCongestionBackoffTime; private int lastCongestionBackoffTime;
private int maxPipelineRecoveryRetries; private int maxPipelineRecoveryRetries;
private int markSlowNodeAsBadNodeThreshold; private int markSlowNodeAsBadNodeThreshold;
@ -564,6 +564,35 @@ private DataStreamer(HdfsFileStatus stat, ExtendedBlock block,
this.addBlockFlags = flags; this.addBlockFlags = flags;
this.maxPipelineRecoveryRetries = conf.getMaxPipelineRecoveryRetries(); this.maxPipelineRecoveryRetries = conf.getMaxPipelineRecoveryRetries();
this.markSlowNodeAsBadNodeThreshold = conf.getMarkSlowNodeAsBadNodeThreshold(); this.markSlowNodeAsBadNodeThreshold = conf.getMarkSlowNodeAsBadNodeThreshold();
congestionBackOffMeanTimeInMs = dfsClient.getConfiguration().getInt(
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME,
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME_DEFAULT);
congestionBackOffMaxTimeInMs = dfsClient.getConfiguration().getInt(
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME,
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME_DEFAULT);
if (congestionBackOffMeanTimeInMs <= 0) {
LOG.warn("Configuration: {} is not appropriate, using default value: {}",
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME,
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME_DEFAULT);
}
if (congestionBackOffMaxTimeInMs <= 0) {
LOG.warn("Configuration: {} is not appropriate, using default value: {}",
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME,
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME_DEFAULT);
}
if (congestionBackOffMaxTimeInMs < congestionBackOffMeanTimeInMs) {
LOG.warn("Configuration: {} can not less than {}, using their default values.",
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME,
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME);
}
if (congestionBackOffMeanTimeInMs <= 0 || congestionBackOffMaxTimeInMs <= 0 ||
congestionBackOffMaxTimeInMs < congestionBackOffMeanTimeInMs) {
congestionBackOffMeanTimeInMs =
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME_DEFAULT;
congestionBackOffMaxTimeInMs =
HdfsClientConfigKeys.DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME_DEFAULT;
}
} }
/** /**
@ -1998,10 +2027,10 @@ private void backOffIfNecessary() throws InterruptedException {
sb.append(' ').append(i); sb.append(' ').append(i);
} }
int range = Math.abs(lastCongestionBackoffTime * 3 - int range = Math.abs(lastCongestionBackoffTime * 3 -
CONGESTION_BACKOFF_MEAN_TIME_IN_MS); congestionBackOffMeanTimeInMs);
int base = Math.min(lastCongestionBackoffTime * 3, int base = Math.min(lastCongestionBackoffTime * 3,
CONGESTION_BACKOFF_MEAN_TIME_IN_MS); congestionBackOffMeanTimeInMs);
t = Math.min(CONGESTION_BACK_OFF_MAX_TIME_IN_MS, t = Math.min(congestionBackOffMaxTimeInMs,
(int)(base + Math.random() * range)); (int)(base + Math.random() * range));
lastCongestionBackoffTime = t; lastCongestionBackoffTime = t;
sb.append(" are congested. Backing off for ").append(t).append(" ms"); sb.append(" are congested. Backing off for ").append(t).append(" ms");

View File

@ -287,6 +287,15 @@ public interface HdfsClientConfigKeys {
"dfs.client.output.stream.uniq.default.key"; "dfs.client.output.stream.uniq.default.key";
String DFS_OUTPUT_STREAM_UNIQ_DEFAULT_KEY_DEFAULT = "DEFAULT"; String DFS_OUTPUT_STREAM_UNIQ_DEFAULT_KEY_DEFAULT = "DEFAULT";
String DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME =
"dfs.client.congestion.backoff.mean.time";
int DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME_DEFAULT = 5000;
String DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME =
"dfs.client.congestion.backoff.max.time";
int DFS_CLIENT_CONGESTION_BACKOFF_MAX_TIME_DEFAULT =
DFS_CLIENT_CONGESTION_BACKOFF_MEAN_TIME_DEFAULT * 10;
/** /**
* These are deprecated config keys to client code. * These are deprecated config keys to client code.
*/ */

View File

@ -6559,6 +6559,22 @@
If the namespace is DEFAULT, it's best to change this conf to other value. If the namespace is DEFAULT, it's best to change this conf to other value.
</description> </description>
</property> </property>
<property>
<name>dfs.client.congestion.backoff.mean.time</name>
<value>5000</value>
<description>
The mean time in milliseconds which is used to compute
client congestion backoff sleep time.
</description>
</property>
<property>
<name>dfs.client.congestion.backoff.max.time</name>
<value>50000</value>
<description>
The max time in milliseconds which is used to restrict
the upper limit backoff sleep time for client.
</description>
</property>
<property> <property>
<name>dfs.client.rbf.observer.read.enable</name> <name>dfs.client.rbf.observer.read.enable</name>
<value>false</value> <value>false</value>

View File

@ -275,6 +275,8 @@ private void runAdjustChunkBoundary(
public void testCongestionBackoff() throws IOException { public void testCongestionBackoff() throws IOException {
DfsClientConf dfsClientConf = mock(DfsClientConf.class); DfsClientConf dfsClientConf = mock(DfsClientConf.class);
DFSClient client = mock(DFSClient.class); DFSClient client = mock(DFSClient.class);
Configuration conf = mock(Configuration.class);
when(client.getConfiguration()).thenReturn(conf);
when(client.getConf()).thenReturn(dfsClientConf); when(client.getConf()).thenReturn(dfsClientConf);
when(client.getTracer()).thenReturn(FsTracer.get(new Configuration())); when(client.getTracer()).thenReturn(FsTracer.get(new Configuration()));
client.clientRunning = true; client.clientRunning = true;
@ -306,6 +308,8 @@ public void testCongestionBackoff() throws IOException {
public void testCongestionAckDelay() { public void testCongestionAckDelay() {
DfsClientConf dfsClientConf = mock(DfsClientConf.class); DfsClientConf dfsClientConf = mock(DfsClientConf.class);
DFSClient client = mock(DFSClient.class); DFSClient client = mock(DFSClient.class);
Configuration conf = mock(Configuration.class);
when(client.getConfiguration()).thenReturn(conf);
when(client.getConf()).thenReturn(dfsClientConf); when(client.getConf()).thenReturn(dfsClientConf);
when(client.getTracer()).thenReturn(FsTracer.get(new Configuration())); when(client.getTracer()).thenReturn(FsTracer.get(new Configuration()));
client.clientRunning = true; client.clientRunning = true;
@ -325,7 +329,7 @@ public void testCongestionAckDelay() {
ArrayList<DatanodeInfo> congestedNodes = (ArrayList<DatanodeInfo>) ArrayList<DatanodeInfo> congestedNodes = (ArrayList<DatanodeInfo>)
Whitebox.getInternalState(stream, "congestedNodes"); Whitebox.getInternalState(stream, "congestedNodes");
int backOffMaxTime = (int) int backOffMaxTime = (int)
Whitebox.getInternalState(stream, "CONGESTION_BACK_OFF_MAX_TIME_IN_MS"); Whitebox.getInternalState(stream, "congestionBackOffMaxTimeInMs");
DFSPacket[] packet = new DFSPacket[100]; DFSPacket[] packet = new DFSPacket[100];
AtomicBoolean isDelay = new AtomicBoolean(true); AtomicBoolean isDelay = new AtomicBoolean(true);