MAPREDUCE-7101. Add config parameter to allow JHS to alway scan user dir irrespective of modTime. (Thomas Marquardt via asuresh)

This commit is contained in:
Arun Suresh 2018-06-12 15:36:52 -07:00
parent aeaf9fec62
commit 5670e89b2e
3 changed files with 23 additions and 3 deletions

View File

@ -62,6 +62,11 @@ public class JHAdminConfig {
public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS =
1 * 24 * 60 * 60 * 1000l; //1 day
/** Always scan user dir, irrespective of dir modification time.*/
public static final String MR_HISTORY_ALWAYS_SCAN_USER_DIR =
MR_HISTORY_PREFIX + "always-scan-user-dir";
public static final boolean DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR =
false;
/** The number of threads to handle client API requests.*/
public static final String MR_HISTORY_CLIENT_THREAD_COUNT =

View File

@ -1774,6 +1774,15 @@
</description>
</property>
<property>
<name>mapreduce.jobhistory.always-scan-user-dir</name>
<value>false</value>
<description>Some Cloud FileSystems do not currently update the
modification time of directories. To support these filesystems, this
configuration value should be set to 'true'.
</description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>

View File

@ -324,7 +324,13 @@ public synchronized void scanIfNeeded(FileStatus fs) {
// so we need to have additional check.
// Note: modTime (X second Y millisecond) could be casted to X second or
// X+1 second.
if (modTime != newModTime
// MAPREDUCE-7101: Some Cloud FileSystems do not currently update the
// modification time of directories. For these, we scan every time if
// the 'alwaysScan' is true.
boolean alwaysScan = conf.getBoolean(
JHAdminConfig.MR_HISTORY_ALWAYS_SCAN_USER_DIR,
JHAdminConfig.DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR);
if (alwaysScan || modTime != newModTime
|| (scanTime/1000) == (modTime/1000)
|| (scanTime/1000 + 1) == (modTime/1000)) {
// reset scanTime before scanning happens