YARN-3943. Use separate threshold configurations for disk-full detection and disk-not-full detection. Contributed by Zhihai Xu
This commit is contained in:
parent
118a35bc2e
commit
8d226225d0
@ -492,6 +492,9 @@ Release 2.8.0 - UNRELEASED
|
||||
YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the
|
||||
only modified Node Label Mappings in the request. (Naganarasimha G R via wangda)
|
||||
|
||||
YARN-3943. Use separate threshold configurations for disk-full detection
|
||||
and disk-not-full detection. (Zhihai Xu via jlowe)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||
|
@ -1059,6 +1059,18 @@ private static void addDeprecatedKeys() {
|
||||
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||
90.0F;
|
||||
|
||||
/**
|
||||
* The low threshold percentage of disk space used when an offline disk is
|
||||
* marked as online. Values can range from 0.0 to 100.0. The value shouldn't
|
||||
* be more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. If its value is
|
||||
* more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE or not set, it will be
|
||||
* set to the same value as NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE.
|
||||
* This applies to nm-local-dirs and nm-log-dirs.
|
||||
*/
|
||||
public static final String NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||
NM_DISK_HEALTH_CHECK_PREFIX +
|
||||
"disk-utilization-watermark-low-per-disk-percentage";
|
||||
|
||||
/**
|
||||
* The minimum space that must be available on a local dir for it to be used.
|
||||
* This applies to nm-local-dirs and nm-log-dirs.
|
||||
|
@ -1317,6 +1317,17 @@
|
||||
<value>90.0</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The low threshold percentage of disk space used when a bad disk is
|
||||
marked as good. Values can range from 0.0 to 100.0. This applies to
|
||||
yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs.
|
||||
Note that if its value is more than yarn.nodemanager.disk-health-checker.
|
||||
max-disk-utilization-per-disk-percentage or not set, it will be set to the same value as
|
||||
yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage.</description>
|
||||
<name>yarn.nodemanager.disk-health-checker.disk-utilization-watermark-low-per-disk-percentage</name>
|
||||
<value></value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The minimum space that must be available on a disk for
|
||||
it to be used. This applies to yarn-nodemanager.local-dirs and
|
||||
|
@ -39,6 +39,8 @@
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.DiskChecker;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/**
|
||||
* Manages a list of local storage directories.
|
||||
*/
|
||||
@ -89,7 +91,8 @@ static List<String> concat(List<String> l1, List<String> l2) {
|
||||
|
||||
private int numFailures;
|
||||
|
||||
private float diskUtilizationPercentageCutoff;
|
||||
private float diskUtilizationPercentageCutoffHigh;
|
||||
private float diskUtilizationPercentageCutoffLow;
|
||||
private long diskUtilizationSpaceCutoff;
|
||||
|
||||
private int goodDirsDiskUtilizationPercentage;
|
||||
@ -103,7 +106,7 @@ static List<String> concat(List<String> l1, List<String> l2) {
|
||||
* directories to be monitored
|
||||
*/
|
||||
public DirectoryCollection(String[] dirs) {
|
||||
this(dirs, 100.0F, 0);
|
||||
this(dirs, 100.0F, 100.0F, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -119,7 +122,7 @@ public DirectoryCollection(String[] dirs) {
|
||||
*
|
||||
*/
|
||||
public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
|
||||
this(dirs, utilizationPercentageCutOff, 0);
|
||||
this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -134,7 +137,7 @@ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
|
||||
*
|
||||
*/
|
||||
public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
|
||||
this(dirs, 100.0F, utilizationSpaceCutOff);
|
||||
this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -145,25 +148,29 @@ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
|
||||
*
|
||||
* @param dirs
|
||||
* directories to be monitored
|
||||
* @param utilizationPercentageCutOff
|
||||
* @param utilizationPercentageCutOffHigh
|
||||
* percentage of disk that can be used before the dir is taken out of
|
||||
* the good dirs list
|
||||
* @param utilizationPercentageCutOffLow
|
||||
* percentage of disk that can be used when the dir is moved from
|
||||
* the bad dirs list to the good dirs list
|
||||
* @param utilizationSpaceCutOff
|
||||
* minimum space, in MB, that must be available on the disk for the
|
||||
* dir to be marked as good
|
||||
*
|
||||
*/
|
||||
public DirectoryCollection(String[] dirs,
|
||||
float utilizationPercentageCutOff,
|
||||
float utilizationPercentageCutOffHigh,
|
||||
float utilizationPercentageCutOffLow,
|
||||
long utilizationSpaceCutOff) {
|
||||
localDirs = new CopyOnWriteArrayList<String>(dirs);
|
||||
errorDirs = new CopyOnWriteArrayList<String>();
|
||||
fullDirs = new CopyOnWriteArrayList<String>();
|
||||
|
||||
diskUtilizationPercentageCutoff =
|
||||
utilizationPercentageCutOff < 0.0F ? 0.0F
|
||||
: (utilizationPercentageCutOff > 100.0F ? 100.0F
|
||||
: utilizationPercentageCutOff);
|
||||
diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
|
||||
utilizationPercentageCutOffHigh));
|
||||
diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
|
||||
diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
|
||||
diskUtilizationSpaceCutoff =
|
||||
utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff;
|
||||
|
||||
@ -254,7 +261,8 @@ synchronized boolean checkDirs() {
|
||||
List<String> allLocalDirs =
|
||||
DirectoryCollection.concat(localDirs, failedDirs);
|
||||
|
||||
Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs);
|
||||
Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs,
|
||||
preCheckGoodDirs);
|
||||
|
||||
localDirs.clear();
|
||||
errorDirs.clear();
|
||||
@ -314,7 +322,8 @@ synchronized boolean checkDirs() {
|
||||
return setChanged;
|
||||
}
|
||||
|
||||
Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
|
||||
Map<String, DiskErrorInformation> testDirs(List<String> dirs,
|
||||
Set<String> goodDirs) {
|
||||
HashMap<String, DiskErrorInformation> ret =
|
||||
new HashMap<String, DiskErrorInformation>();
|
||||
for (final String dir : dirs) {
|
||||
@ -322,7 +331,10 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
|
||||
try {
|
||||
File testDir = new File(dir);
|
||||
DiskChecker.checkDir(testDir);
|
||||
if (isDiskUsageOverPercentageLimit(testDir)) {
|
||||
float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
|
||||
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
||||
if (isDiskUsageOverPercentageLimit(testDir,
|
||||
diskUtilizationPercentageCutoff)) {
|
||||
msg =
|
||||
"used space above threshold of "
|
||||
+ diskUtilizationPercentageCutoff
|
||||
@ -374,7 +386,8 @@ private void verifyDirUsingMkdir(File dir) throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isDiskUsageOverPercentageLimit(File dir) {
|
||||
private boolean isDiskUsageOverPercentageLimit(File dir,
|
||||
float diskUtilizationPercentageCutoff) {
|
||||
float freePercentage =
|
||||
100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
|
||||
float usedPercentage = 100.0F - freePercentage;
|
||||
@ -403,16 +416,23 @@ private void createDir(FileContext localFs, Path dir, FsPermission perm)
|
||||
}
|
||||
}
|
||||
|
||||
public float getDiskUtilizationPercentageCutoff() {
|
||||
return diskUtilizationPercentageCutoff;
|
||||
@VisibleForTesting
|
||||
float getDiskUtilizationPercentageCutoffHigh() {
|
||||
return diskUtilizationPercentageCutoffHigh;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
float getDiskUtilizationPercentageCutoffLow() {
|
||||
return diskUtilizationPercentageCutoffLow;
|
||||
}
|
||||
|
||||
public void setDiskUtilizationPercentageCutoff(
|
||||
float diskUtilizationPercentageCutoff) {
|
||||
this.diskUtilizationPercentageCutoff =
|
||||
diskUtilizationPercentageCutoff < 0.0F ? 0.0F
|
||||
: (diskUtilizationPercentageCutoff > 100.0F ? 100.0F
|
||||
: diskUtilizationPercentageCutoff);
|
||||
float utilizationPercentageCutOffHigh,
|
||||
float utilizationPercentageCutOffLow) {
|
||||
diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
|
||||
utilizationPercentageCutOffHigh));
|
||||
diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
|
||||
diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
|
||||
}
|
||||
|
||||
public long getDiskUtilizationSpaceCutoff() {
|
||||
|
@ -114,22 +114,40 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||
private final class MonitoringTimerTask extends TimerTask {
|
||||
|
||||
public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException {
|
||||
float maxUsableSpacePercentagePerDisk =
|
||||
float highUsableSpacePercentagePerDisk =
|
||||
conf.getFloat(
|
||||
YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
||||
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE);
|
||||
float lowUsableSpacePercentagePerDisk =
|
||||
conf.getFloat(
|
||||
YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE,
|
||||
highUsableSpacePercentagePerDisk);
|
||||
if (lowUsableSpacePercentagePerDisk > highUsableSpacePercentagePerDisk) {
|
||||
LOG.warn("Using " + YarnConfiguration.
|
||||
NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE + " as " +
|
||||
YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
|
||||
", because " + YarnConfiguration.
|
||||
NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
|
||||
" is not configured properly.");
|
||||
lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk;
|
||||
}
|
||||
long minFreeSpacePerDiskMB =
|
||||
conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB,
|
||||
YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
|
||||
localDirs =
|
||||
new DirectoryCollection(
|
||||
validatePaths(conf
|
||||
.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
|
||||
maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
|
||||
validatePaths(conf
|
||||
.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
|
||||
highUsableSpacePercentagePerDisk,
|
||||
lowUsableSpacePercentagePerDisk,
|
||||
minFreeSpacePerDiskMB);
|
||||
logDirs =
|
||||
new DirectoryCollection(
|
||||
validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
|
||||
maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
|
||||
validatePaths(conf
|
||||
.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
|
||||
highUsableSpacePercentagePerDisk,
|
||||
lowUsableSpacePercentagePerDisk,
|
||||
minFreeSpacePerDiskMB);
|
||||
|
||||
String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS);
|
||||
conf.set(NM_GOOD_LOCAL_DIRS,
|
||||
|
@ -152,7 +152,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
|
||||
// no good dirs
|
||||
Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 100.0F, 0);
|
||||
dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
|
||||
utilizedSpacePerc =
|
||||
(int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
|
||||
testDir.getTotalSpace());
|
||||
@ -168,18 +168,28 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
|
||||
public void testDiskLimitsCutoffSetters() throws IOException {
|
||||
|
||||
String[] dirs = { "dir" };
|
||||
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
|
||||
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 0.0F, 100);
|
||||
float testValue = 57.5F;
|
||||
float delta = 0.1F;
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue);
|
||||
Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoff(),
|
||||
delta);
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue, 50.0F);
|
||||
Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(50.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
|
||||
testValue = -57.5F;
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
|
||||
testValue = 157.5F;
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
|
||||
long spaceValue = 57;
|
||||
dc.setDiskUtilizationSpaceCutoff(spaceValue);
|
||||
@ -200,7 +210,7 @@ public void testFailedDisksBecomingGoodAgain() throws Exception {
|
||||
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||
|
||||
dc.setDiskUtilizationPercentageCutoff(100.0F);
|
||||
dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||
@ -236,27 +246,45 @@ public void testConstructors() {
|
||||
String[] dirs = { "dir" };
|
||||
float delta = 0.1F;
|
||||
DirectoryCollection dc = new DirectoryCollection(dirs);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 57.5F);
|
||||
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 57);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 57.5F, 67);
|
||||
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67);
|
||||
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff());
|
||||
|
||||
dc = new DirectoryCollection(dirs, -57.5F, -67);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 157.5F, -67);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
|
||||
dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
|
||||
delta);
|
||||
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
|
||||
delta);
|
||||
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
||||
}
|
||||
|
||||
@ -288,7 +316,7 @@ public void testDirsChangeListener() {
|
||||
Assert.assertEquals(listener3.num, 1);
|
||||
|
||||
dc.deregisterDirsChangeListener(listener2);
|
||||
dc.setDiskUtilizationPercentageCutoff(100.0F);
|
||||
dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(listener1.num, 3);
|
||||
|
Loading…
Reference in New Issue
Block a user