From 88c4d4a7140b29a409e2ee707f41db5900046843 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Thu, 14 Nov 2013 18:09:14 +0000 Subject: [PATCH] MAPREDUCE-5616. MR Client-AppMaster RPC max retries on socket timeout is too high. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1542001 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/mapreduce/MRJobConfig.java | 10 +++++++++- .../src/main/resources/mapred-default.xml | 7 +++++++ .../apache/hadoop/mapred/ClientServiceDelegate.java | 4 ++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0bff2f228e..b7aeb591e7 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -217,6 +217,9 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5610. TestSleepJob fails in jdk7 (Jonathan Eagles via jlowe) + MAPREDUCE-5616. MR Client-AppMaster RPC max retries on socket timeout is too + high. (cnauroth) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 76097de896..2622ec5da6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -357,13 +357,21 @@ public interface MRJobConfig { public static final String MR_AM_PREFIX = MR_PREFIX + "am."; - /** The number of client retires to the AM - before reconnecting to the RM + /** The number of client retries to the AM - before reconnecting to the RM * to fetch Application State. */ public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES = MR_PREFIX + "client-am.ipc.max-retries"; public static final int DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES = 3; + /** The number of client retries on socket timeouts to the AM - before + * reconnecting to the RM to fetch Application Status. + */ + public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = + MR_PREFIX + "yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts"; + public static final int + DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = 3; + /** * The number of client retries to the RM/HS before throwing exception. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 4a228820b2..fcaa275cc1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1010,6 +1010,13 @@ to the RM to fetch Application Status. + + yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts + 3 + The number of client retries on socket timeouts to the AM - before + reconnecting to the RM to fetch Application Status. + + yarn.app.mapreduce.client.max-retries 3 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java index d9dd777efa..c3eee2cfdc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java @@ -107,6 +107,10 @@ public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES)); + this.conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS, + MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS)); this.rm = rm; this.jobId = jobId; this.historyServerProxy = historyServerProxy;