diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 0fe5b7c2fc..fa73ba1543 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -432,6 +432,9 @@ Release 2.8.0 - UNRELEASED
NEW FEATURES
+ HADOOP-11226. Add a configuration to set ipc.Client's traffic class with
+ IPTOS_LOWDELAY|IPTOS_RELIABILITY. (Gopal V via ozawa)
+
IMPROVEMENTS
OPTIMIZATIONS
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
index 00c8d78bf7..470b4d0921 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
@@ -206,8 +206,12 @@ public class CommonConfigurationKeysPublic {
/** See core-default.xml */
public static final String IPC_CLIENT_TCPNODELAY_KEY =
"ipc.client.tcpnodelay";
- /** Defalt value for IPC_CLIENT_TCPNODELAY_KEY */
+ /** Default value for IPC_CLIENT_TCPNODELAY_KEY */
public static final boolean IPC_CLIENT_TCPNODELAY_DEFAULT = true;
+ /** Enable low-latency connections from the client */
+ public static final String IPC_CLIENT_LOW_LATENCY = "ipc.client.low-latency";
+ /** Default value of IPC_CLIENT_LOW_LATENCY */
+ public static final boolean IPC_CLIENT_LOW_LATENCY_DEFAULT = false;
/** See core-default.xml */
public static final String IPC_SERVER_LISTEN_QUEUE_SIZE_KEY =
"ipc.server.listen.queue.size";
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 32558bcb87..97b715bcfa 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -384,7 +384,8 @@ private class Connection extends Thread {
private final RetryPolicy connectionRetryPolicy;
private final int maxRetriesOnSasl;
private int maxRetriesOnSocketTimeouts;
- private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
+ private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
+ private final boolean tcpLowLatency; // if T then use low-delay QoS
private boolean doPing; //do we need to send ping message
private int pingInterval; // how often sends ping to the server in msecs
private ByteArrayOutputStream pingRequest; // ping message
@@ -413,6 +414,7 @@ public Connection(ConnectionId remoteId, int serviceClass) throws IOException {
this.maxRetriesOnSasl = remoteId.getMaxRetriesOnSasl();
this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
this.tcpNoDelay = remoteId.getTcpNoDelay();
+ this.tcpLowLatency = remoteId.getTcpLowLatency();
this.doPing = remoteId.getDoPing();
if (doPing) {
// construct a RPC header with the callId as the ping callId
@@ -585,6 +587,20 @@ private synchronized void setupConnection() throws IOException {
this.socket.setTcpNoDelay(tcpNoDelay);
this.socket.setKeepAlive(true);
+ if (tcpLowLatency) {
+ /*
+ * This allows intermediate switches to shape IPC traffic
+ * differently from Shuffle/HDFS DataStreamer traffic.
+ *
+ * IPTOS_RELIABILITY (0x04) | IPTOS_LOWDELAY (0x10)
+ *
+ * Prefer to optimize connect() speed & response latency over net
+ * throughput.
+ */
+ this.socket.setTrafficClass(0x04 | 0x10);
+ this.socket.setPerformancePreferences(1, 2, 0);
+ }
+
/*
* Bind the socket to the host specified in the principal name of the
* client, to ensure Server matching address of the client connection
@@ -1549,6 +1565,7 @@ public static class ConnectionId {
// the max. no. of retries for socket connections on time out exceptions
private final int maxRetriesOnSocketTimeouts;
private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
+ private final boolean tcpLowLatency; // if T then use low-delay QoS
private final boolean doPing; //do we need to send ping message
private final int pingInterval; // how often sends ping to the server in msecs
private String saslQop; // here for testing
@@ -1575,6 +1592,10 @@ public static class ConnectionId {
this.tcpNoDelay = conf.getBoolean(
CommonConfigurationKeysPublic.IPC_CLIENT_TCPNODELAY_KEY,
CommonConfigurationKeysPublic.IPC_CLIENT_TCPNODELAY_DEFAULT);
+ this.tcpLowLatency = conf.getBoolean(
+ CommonConfigurationKeysPublic.IPC_CLIENT_LOW_LATENCY,
+ CommonConfigurationKeysPublic.IPC_CLIENT_LOW_LATENCY_DEFAULT
+ );
this.doPing = conf.getBoolean(
CommonConfigurationKeys.IPC_CLIENT_PING_KEY,
CommonConfigurationKeys.IPC_CLIENT_PING_DEFAULT);
@@ -1610,11 +1631,17 @@ public int getMaxRetriesOnSasl() {
public int getMaxRetriesOnSocketTimeouts() {
return maxRetriesOnSocketTimeouts;
}
-
+
+ /** disable nagle's algorithm */
boolean getTcpNoDelay() {
return tcpNoDelay;
}
-
+
+ /** use low-latency QoS bits over TCP */
+ boolean getTcpLowLatency() {
+ return tcpLowLatency;
+ }
+
boolean getDoPing() {
return doPing;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 74390d815d..1d531dffef 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -977,6 +977,20 @@ for ldap providers in the same way as above does.
+
+ ipc.client.tcpnodelay
+ true
+ Use TCP_NODELAY flag to bypass Nagle's algorithm transmission delays.
+
+
+
+
+ ipc.client.low-latency
+ false
+ Use low-latency QoS markers for IPC connections.
+
+
+
ipc.server.listen.queue.size
128