YARN-3943. Use separate threshold configurations for disk-full detection and disk-not-full detection. Contributed by Zhihai Xu

This commit is contained in:
Jason Lowe 2015-10-08 22:25:34 +00:00
parent 118a35bc2e
commit 8d226225d0
6 changed files with 142 additions and 50 deletions

View File

@ -492,6 +492,9 @@ Release 2.8.0 - UNRELEASED
YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the
only modified Node Label Mappings in the request. (Naganarasimha G R via wangda)
YARN-3943. Use separate threshold configurations for disk-full detection
and disk-not-full detection. (Zhihai Xu via jlowe)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -1059,6 +1059,18 @@ private static void addDeprecatedKeys() {
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
90.0F;
/**
* The low threshold percentage of disk space used when an offline disk is
* marked as online. Values can range from 0.0 to 100.0. The value shouldn't
* be more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. If its value is
* more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE or not set, it will be
* set to the same value as NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE.
* This applies to nm-local-dirs and nm-log-dirs.
*/
public static final String NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE =
NM_DISK_HEALTH_CHECK_PREFIX +
"disk-utilization-watermark-low-per-disk-percentage";
/**
* The minimum space that must be available on a local dir for it to be used.
* This applies to nm-local-dirs and nm-log-dirs.

View File

@ -1317,6 +1317,17 @@
<value>90.0</value>
</property>
<property>
<description>The low threshold percentage of disk space used when a bad disk is
marked as good. Values can range from 0.0 to 100.0. This applies to
yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs.
Note that if its value is more than yarn.nodemanager.disk-health-checker.
max-disk-utilization-per-disk-percentage or not set, it will be set to the same value as
yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage.</description>
<name>yarn.nodemanager.disk-health-checker.disk-utilization-watermark-low-per-disk-percentage</name>
<value></value>
</property>
<property>
<description>The minimum space that must be available on a disk for
it to be used. This applies to yarn-nodemanager.local-dirs and

View File

@ -39,6 +39,8 @@
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.DiskChecker;
import com.google.common.annotations.VisibleForTesting;
/**
* Manages a list of local storage directories.
*/
@ -88,8 +90,9 @@ static List<String> concat(List<String> l1, List<String> l2) {
private List<String> fullDirs;
private int numFailures;
private float diskUtilizationPercentageCutoff;
private float diskUtilizationPercentageCutoffHigh;
private float diskUtilizationPercentageCutoffLow;
private long diskUtilizationSpaceCutoff;
private int goodDirsDiskUtilizationPercentage;
@ -103,7 +106,7 @@ static List<String> concat(List<String> l1, List<String> l2) {
* directories to be monitored
*/
public DirectoryCollection(String[] dirs) {
this(dirs, 100.0F, 0);
this(dirs, 100.0F, 100.0F, 0);
}
/**
@ -119,7 +122,7 @@ public DirectoryCollection(String[] dirs) {
*
*/
public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
this(dirs, utilizationPercentageCutOff, 0);
this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0);
}
/**
@ -134,7 +137,7 @@ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
*
*/
public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
this(dirs, 100.0F, utilizationSpaceCutOff);
this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff);
}
/**
@ -145,25 +148,29 @@ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
*
* @param dirs
* directories to be monitored
* @param utilizationPercentageCutOff
* @param utilizationPercentageCutOffHigh
* percentage of disk that can be used before the dir is taken out of
* the good dirs list
* @param utilizationPercentageCutOffLow
* percentage of disk that can be used when the dir is moved from
* the bad dirs list to the good dirs list
* @param utilizationSpaceCutOff
* minimum space, in MB, that must be available on the disk for the
* dir to be marked as good
*
*/
public DirectoryCollection(String[] dirs,
float utilizationPercentageCutOff,
public DirectoryCollection(String[] dirs,
float utilizationPercentageCutOffHigh,
float utilizationPercentageCutOffLow,
long utilizationSpaceCutOff) {
localDirs = new CopyOnWriteArrayList<String>(dirs);
errorDirs = new CopyOnWriteArrayList<String>();
fullDirs = new CopyOnWriteArrayList<String>();
diskUtilizationPercentageCutoff =
utilizationPercentageCutOff < 0.0F ? 0.0F
: (utilizationPercentageCutOff > 100.0F ? 100.0F
: utilizationPercentageCutOff);
diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
utilizationPercentageCutOffHigh));
diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
diskUtilizationSpaceCutoff =
utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff;
@ -254,7 +261,8 @@ synchronized boolean checkDirs() {
List<String> allLocalDirs =
DirectoryCollection.concat(localDirs, failedDirs);
Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs);
Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs,
preCheckGoodDirs);
localDirs.clear();
errorDirs.clear();
@ -314,7 +322,8 @@ synchronized boolean checkDirs() {
return setChanged;
}
Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
Map<String, DiskErrorInformation> testDirs(List<String> dirs,
Set<String> goodDirs) {
HashMap<String, DiskErrorInformation> ret =
new HashMap<String, DiskErrorInformation>();
for (final String dir : dirs) {
@ -322,7 +331,10 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
try {
File testDir = new File(dir);
DiskChecker.checkDir(testDir);
if (isDiskUsageOverPercentageLimit(testDir)) {
float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
if (isDiskUsageOverPercentageLimit(testDir,
diskUtilizationPercentageCutoff)) {
msg =
"used space above threshold of "
+ diskUtilizationPercentageCutoff
@ -374,7 +386,8 @@ private void verifyDirUsingMkdir(File dir) throws IOException {
}
}
private boolean isDiskUsageOverPercentageLimit(File dir) {
private boolean isDiskUsageOverPercentageLimit(File dir,
float diskUtilizationPercentageCutoff) {
float freePercentage =
100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
float usedPercentage = 100.0F - freePercentage;
@ -402,17 +415,24 @@ private void createDir(FileContext localFs, Path dir, FsPermission perm)
}
}
}
public float getDiskUtilizationPercentageCutoff() {
return diskUtilizationPercentageCutoff;
@VisibleForTesting
float getDiskUtilizationPercentageCutoffHigh() {
return diskUtilizationPercentageCutoffHigh;
}
@VisibleForTesting
float getDiskUtilizationPercentageCutoffLow() {
return diskUtilizationPercentageCutoffLow;
}
public void setDiskUtilizationPercentageCutoff(
float diskUtilizationPercentageCutoff) {
this.diskUtilizationPercentageCutoff =
diskUtilizationPercentageCutoff < 0.0F ? 0.0F
: (diskUtilizationPercentageCutoff > 100.0F ? 100.0F
: diskUtilizationPercentageCutoff);
float utilizationPercentageCutOffHigh,
float utilizationPercentageCutOffLow) {
diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
utilizationPercentageCutOffHigh));
diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
}
public long getDiskUtilizationSpaceCutoff() {

View File

@ -114,22 +114,40 @@ public class LocalDirsHandlerService extends AbstractService {
private final class MonitoringTimerTask extends TimerTask {
public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException {
float maxUsableSpacePercentagePerDisk =
float highUsableSpacePercentagePerDisk =
conf.getFloat(
YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE);
float lowUsableSpacePercentagePerDisk =
conf.getFloat(
YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE,
highUsableSpacePercentagePerDisk);
if (lowUsableSpacePercentagePerDisk > highUsableSpacePercentagePerDisk) {
LOG.warn("Using " + YarnConfiguration.
NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE + " as " +
YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
", because " + YarnConfiguration.
NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
" is not configured properly.");
lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk;
}
long minFreeSpacePerDiskMB =
conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB,
YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
localDirs =
new DirectoryCollection(
validatePaths(conf
.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
validatePaths(conf
.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
highUsableSpacePercentagePerDisk,
lowUsableSpacePercentagePerDisk,
minFreeSpacePerDiskMB);
logDirs =
new DirectoryCollection(
validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
validatePaths(conf
.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
highUsableSpacePercentagePerDisk,
lowUsableSpacePercentagePerDisk,
minFreeSpacePerDiskMB);
String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS);
conf.set(NM_GOOD_LOCAL_DIRS,

View File

@ -152,7 +152,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
// no good dirs
Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
dc = new DirectoryCollection(dirs, 100.0F, 0);
dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
utilizedSpacePerc =
(int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
testDir.getTotalSpace());
@ -168,18 +168,28 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
public void testDiskLimitsCutoffSetters() throws IOException {
String[] dirs = { "dir" };
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 0.0F, 100);
float testValue = 57.5F;
float delta = 0.1F;
dc.setDiskUtilizationPercentageCutoff(testValue);
Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoff(),
delta);
dc.setDiskUtilizationPercentageCutoff(testValue, 50.0F);
Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(50.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
testValue = -57.5F;
dc.setDiskUtilizationPercentageCutoff(testValue);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
testValue = 157.5F;
dc.setDiskUtilizationPercentageCutoff(testValue);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
long spaceValue = 57;
dc.setDiskUtilizationSpaceCutoff(spaceValue);
@ -200,7 +210,7 @@ public void testFailedDisksBecomingGoodAgain() throws Exception {
Assert.assertEquals(1, dc.getFailedDirs().size());
Assert.assertEquals(1, dc.getFullDirs().size());
dc.setDiskUtilizationPercentageCutoff(100.0F);
dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(0, dc.getFailedDirs().size());
@ -236,27 +246,45 @@ public void testConstructors() {
String[] dirs = { "dir" };
float delta = 0.1F;
DirectoryCollection dc = new DirectoryCollection(dirs);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 57.5F);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 57);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 57.5F, 67);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, -57.5F, -67);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 157.5F, -67);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
}
@ -288,7 +316,7 @@ public void testDirsChangeListener() {
Assert.assertEquals(listener3.num, 1);
dc.deregisterDirsChangeListener(listener2);
dc.setDiskUtilizationPercentageCutoff(100.0F);
dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(listener1.num, 3);