YARN-4546. ResourceManager crash due to scheduling opportunity overflow. Contributed by Jason Lowe.
This commit is contained in:
parent
2d16f40dab
commit
c1462a67ff
@ -1215,6 +1215,9 @@ Release 2.8.0 - UNRELEASED
|
||||
YARN-1382. Remove unusableRMNodesConcurrentSet (never used) in NodeListManager
|
||||
to get rid of memory leak. (Rohith Sharma K S via junping_du)
|
||||
|
||||
YARN-4546. ResourceManager crash due to scheduling opportunity overflow.
|
||||
(Jason Lowe via junping_du)
|
||||
|
||||
Release 2.7.3 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
@ -1270,6 +1273,9 @@ Release 2.7.3 - UNRELEASED
|
||||
|
||||
YARN-4510. Fix SLS startup failure caused by NPE. (Bibin A Chundatt via wangda)
|
||||
|
||||
YARN-4546. ResourceManager crash due to scheduling opportunity overflow.
|
||||
(Jason Lowe via junping_du)
|
||||
|
||||
Release 2.7.2 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
@ -2130,6 +2136,9 @@ Release 2.6.4 - UNRELEASED
|
||||
YARN-3697. FairScheduler: ContinuousSchedulingThread can fail to shutdown.
|
||||
(Zhihai Xu via kasha)
|
||||
|
||||
YARN-4546. ResourceManager crash due to scheduling opportunity overflow.
|
||||
(Jason Lowe via junping_du)
|
||||
|
||||
Release 2.6.3 - 2015-12-17
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -625,8 +625,10 @@ public synchronized int addMissedNonPartitionedRequestSchedulingOpportunity(
|
||||
|
||||
|
||||
public synchronized void addSchedulingOpportunity(Priority priority) {
|
||||
schedulingOpportunities.setCount(priority,
|
||||
schedulingOpportunities.count(priority) + 1);
|
||||
int count = schedulingOpportunities.count(priority);
|
||||
if (count < Integer.MAX_VALUE) {
|
||||
schedulingOpportunities.setCount(priority, count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void subtractSchedulingOpportunity(Priority priority) {
|
||||
@ -661,6 +663,11 @@ public synchronized void resetSchedulingOpportunities(Priority priority,
|
||||
schedulingOpportunities.setCount(priority, 0);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void setSchedulingOpportunities(Priority priority, int count) {
|
||||
schedulingOpportunities.setCount(priority, count);
|
||||
}
|
||||
|
||||
synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() {
|
||||
long currentTimeMillis = System.currentTimeMillis();
|
||||
// Don't walk the whole container list if the resources were computed
|
||||
|
@ -249,4 +249,26 @@ public void testAppPercentagesOnswitch() throws Exception {
|
||||
assertEquals(0.0f, app.getResourceUsageReport().getClusterUsagePercentage(),
|
||||
0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSchedulingOpportunityOverflow() throws Exception {
|
||||
ApplicationAttemptId attemptId = createAppAttemptId(0, 0);
|
||||
Queue queue = createQueue("test", null);
|
||||
RMContext rmContext = mock(RMContext.class);
|
||||
when(rmContext.getEpoch()).thenReturn(3L);
|
||||
SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(
|
||||
attemptId, "user", queue, queue.getActiveUsersManager(), rmContext);
|
||||
Priority priority = Priority.newInstance(1);
|
||||
assertEquals(0, app.getSchedulingOpportunities(priority));
|
||||
app.addSchedulingOpportunity(priority);
|
||||
assertEquals(1, app.getSchedulingOpportunities(priority));
|
||||
// verify the count is capped at MAX_VALUE and does not overflow
|
||||
app.setSchedulingOpportunities(priority, Integer.MAX_VALUE - 1);
|
||||
assertEquals(Integer.MAX_VALUE - 1,
|
||||
app.getSchedulingOpportunities(priority));
|
||||
app.addSchedulingOpportunity(priority);
|
||||
assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority));
|
||||
app.addSchedulingOpportunity(priority);
|
||||
assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user