YARN-3272. Surface container locality info in RM web UI (Jian He via wangda)
This commit is contained in:
parent
1004473aa6
commit
e17e5ba9d7
@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED
|
||||
YARN-3281. Added RMStateStore to StateMachine visualization list.
|
||||
(Chengbing Liu via jianhe)
|
||||
|
||||
YARN-3272. Surface container locality info in RM web UI.
|
||||
(Jian He via wangda)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-2990. FairScheduler's delay-scheduling always waits for node-local and
|
||||
|
@ -62,6 +62,13 @@
|
||||
<Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.attempt\.RMAppAttemptImpl.*" />
|
||||
<Bug pattern="BC_UNCONFIRMED_CAST" />
|
||||
</Match>
|
||||
<Match>
|
||||
<Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.attempt\.RMAppAttemptMetrics" />
|
||||
<Method name="getLocalityStatistics" />
|
||||
<Bug pattern="EI_EXPOSE_REP" />
|
||||
<Method name="incNumAllocatedContainers"/>
|
||||
<Bug pattern="VO_VOLATILE_INCREMENT" />
|
||||
</Match>
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl$AppRejectedTransition" />
|
||||
<Bug pattern="BC_UNCONFIRMED_CAST" />
|
||||
|
@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
|
||||
public class RMAppAttemptMetrics {
|
||||
@ -49,6 +50,10 @@ public class RMAppAttemptMetrics {
|
||||
private AtomicLong finishedVcoreSeconds = new AtomicLong(0);
|
||||
private RMContext rmContext;
|
||||
|
||||
private int[][] localityStatistics =
|
||||
new int[NodeType.values().length][NodeType.values().length];
|
||||
private volatile int totalAllocatedContainers;
|
||||
|
||||
public RMAppAttemptMetrics(ApplicationAttemptId attemptId,
|
||||
RMContext rmContext) {
|
||||
this.attemptId = attemptId;
|
||||
@ -57,7 +62,7 @@ public class RMAppAttemptMetrics {
|
||||
this.writeLock = lock.writeLock();
|
||||
this.rmContext = rmContext;
|
||||
}
|
||||
|
||||
|
||||
public void updatePreemptionInfo(Resource resource, RMContainer container) {
|
||||
try {
|
||||
writeLock.lock();
|
||||
@ -126,4 +131,18 @@ public class RMAppAttemptMetrics {
|
||||
this.finishedMemorySeconds.addAndGet(finishedMemorySeconds);
|
||||
this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds);
|
||||
}
|
||||
|
||||
public void incNumAllocatedContainers(NodeType containerType,
|
||||
NodeType requestType) {
|
||||
localityStatistics[containerType.index][requestType.index]++;
|
||||
totalAllocatedContainers++;
|
||||
}
|
||||
|
||||
public int[][] getLocalityStatistics() {
|
||||
return this.localityStatistics;
|
||||
}
|
||||
|
||||
public int getTotalAllocatedContainers() {
|
||||
return this.totalAllocatedContainers;
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
* Resource classification.
|
||||
*/
|
||||
public enum NodeType {
|
||||
NODE_LOCAL,
|
||||
RACK_LOCAL,
|
||||
OFF_SWITCH
|
||||
NODE_LOCAL(0), RACK_LOCAL(1), OFF_SWITCH(2);
|
||||
public int index;
|
||||
|
||||
private NodeType(int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent;
|
||||
@ -78,7 +79,7 @@ public class SchedulerApplicationAttempt {
|
||||
private long lastVcoreSeconds = 0;
|
||||
|
||||
protected final AppSchedulingInfo appSchedulingInfo;
|
||||
|
||||
protected ApplicationAttemptId attemptId;
|
||||
protected Map<ContainerId, RMContainer> liveContainers =
|
||||
new HashMap<ContainerId, RMContainer>();
|
||||
protected final Map<Priority, Map<NodeId, RMContainer>> reservedContainers =
|
||||
@ -132,6 +133,7 @@ public class SchedulerApplicationAttempt {
|
||||
activeUsersManager, rmContext.getEpoch());
|
||||
this.queue = queue;
|
||||
this.pendingRelease = new HashSet<ContainerId>();
|
||||
this.attemptId = applicationAttemptId;
|
||||
if (rmContext.getRMApps() != null &&
|
||||
rmContext.getRMApps()
|
||||
.containsKey(applicationAttemptId.getApplicationId())) {
|
||||
@ -619,4 +621,15 @@ public class SchedulerApplicationAttempt {
|
||||
// schedulingOpportunities
|
||||
// lastScheduledContainer
|
||||
}
|
||||
|
||||
public void incNumAllocatedContainers(NodeType containerType,
|
||||
NodeType requestType) {
|
||||
RMAppAttempt attempt =
|
||||
rmContext.getRMApps().get(attemptId.getApplicationId())
|
||||
.getCurrentAppAttempt();
|
||||
if (attempt != null) {
|
||||
attempt.getRMAppAttemptMetrics().incNumAllocatedContainers(containerType,
|
||||
requestType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang.mutable.MutableObject;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
@ -1242,15 +1243,25 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
RMContainer reservedContainer, boolean needToUnreserve) {
|
||||
Resource assigned = Resources.none();
|
||||
|
||||
NodeType requestType = null;
|
||||
MutableObject allocatedContainer = new MutableObject();
|
||||
// Data-local
|
||||
ResourceRequest nodeLocalResourceRequest =
|
||||
application.getResourceRequest(priority, node.getNodeName());
|
||||
if (nodeLocalResourceRequest != null) {
|
||||
assigned =
|
||||
assignNodeLocalContainers(clusterResource, nodeLocalResourceRequest,
|
||||
node, application, priority, reservedContainer, needToUnreserve);
|
||||
if (Resources.greaterThan(resourceCalculator, clusterResource,
|
||||
requestType = NodeType.NODE_LOCAL;
|
||||
assigned =
|
||||
assignNodeLocalContainers(clusterResource, nodeLocalResourceRequest,
|
||||
node, application, priority, reservedContainer, needToUnreserve,
|
||||
allocatedContainer);
|
||||
if (Resources.greaterThan(resourceCalculator, clusterResource,
|
||||
assigned, Resources.none())) {
|
||||
|
||||
//update locality statistics
|
||||
if (allocatedContainer.getValue() != null) {
|
||||
application.incNumAllocatedContainers(NodeType.NODE_LOCAL,
|
||||
requestType);
|
||||
}
|
||||
return new CSAssignment(assigned, NodeType.NODE_LOCAL);
|
||||
}
|
||||
}
|
||||
@ -1262,12 +1273,23 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
if (!rackLocalResourceRequest.getRelaxLocality()) {
|
||||
return SKIP_ASSIGNMENT;
|
||||
}
|
||||
|
||||
assigned =
|
||||
assignRackLocalContainers(clusterResource, rackLocalResourceRequest,
|
||||
node, application, priority, reservedContainer, needToUnreserve);
|
||||
if (Resources.greaterThan(resourceCalculator, clusterResource,
|
||||
|
||||
if (requestType != NodeType.NODE_LOCAL) {
|
||||
requestType = NodeType.RACK_LOCAL;
|
||||
}
|
||||
|
||||
assigned =
|
||||
assignRackLocalContainers(clusterResource, rackLocalResourceRequest,
|
||||
node, application, priority, reservedContainer, needToUnreserve,
|
||||
allocatedContainer);
|
||||
if (Resources.greaterThan(resourceCalculator, clusterResource,
|
||||
assigned, Resources.none())) {
|
||||
|
||||
//update locality statistics
|
||||
if (allocatedContainer.getValue() != null) {
|
||||
application.incNumAllocatedContainers(NodeType.RACK_LOCAL,
|
||||
requestType);
|
||||
}
|
||||
return new CSAssignment(assigned, NodeType.RACK_LOCAL);
|
||||
}
|
||||
}
|
||||
@ -1279,11 +1301,21 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
if (!offSwitchResourceRequest.getRelaxLocality()) {
|
||||
return SKIP_ASSIGNMENT;
|
||||
}
|
||||
if (requestType != NodeType.NODE_LOCAL
|
||||
&& requestType != NodeType.RACK_LOCAL) {
|
||||
requestType = NodeType.OFF_SWITCH;
|
||||
}
|
||||
|
||||
return new CSAssignment(assignOffSwitchContainers(clusterResource,
|
||||
offSwitchResourceRequest, node, application, priority,
|
||||
reservedContainer, needToUnreserve),
|
||||
NodeType.OFF_SWITCH);
|
||||
assigned =
|
||||
assignOffSwitchContainers(clusterResource, offSwitchResourceRequest,
|
||||
node, application, priority, reservedContainer, needToUnreserve,
|
||||
allocatedContainer);
|
||||
|
||||
// update locality statistics
|
||||
if (allocatedContainer.getValue() != null) {
|
||||
application.incNumAllocatedContainers(NodeType.OFF_SWITCH, requestType);
|
||||
}
|
||||
return new CSAssignment(assigned, NodeType.OFF_SWITCH);
|
||||
}
|
||||
|
||||
return SKIP_ASSIGNMENT;
|
||||
@ -1370,40 +1402,43 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
private Resource assignNodeLocalContainers(Resource clusterResource,
|
||||
ResourceRequest nodeLocalResourceRequest, FiCaSchedulerNode node,
|
||||
FiCaSchedulerApp application, Priority priority,
|
||||
RMContainer reservedContainer, boolean needToUnreserve) {
|
||||
if (canAssign(application, priority, node, NodeType.NODE_LOCAL,
|
||||
RMContainer reservedContainer, boolean needToUnreserve,
|
||||
MutableObject allocatedContainer) {
|
||||
if (canAssign(application, priority, node, NodeType.NODE_LOCAL,
|
||||
reservedContainer)) {
|
||||
return assignContainer(clusterResource, node, application, priority,
|
||||
nodeLocalResourceRequest, NodeType.NODE_LOCAL, reservedContainer,
|
||||
needToUnreserve);
|
||||
needToUnreserve, allocatedContainer);
|
||||
}
|
||||
|
||||
return Resources.none();
|
||||
}
|
||||
|
||||
private Resource assignRackLocalContainers(Resource clusterResource,
|
||||
ResourceRequest rackLocalResourceRequest, FiCaSchedulerNode node,
|
||||
FiCaSchedulerApp application, Priority priority,
|
||||
RMContainer reservedContainer, boolean needToUnreserve) {
|
||||
if (canAssign(application, priority, node, NodeType.RACK_LOCAL,
|
||||
private Resource assignRackLocalContainers(
|
||||
Resource clusterResource, ResourceRequest rackLocalResourceRequest,
|
||||
FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority,
|
||||
RMContainer reservedContainer, boolean needToUnreserve,
|
||||
MutableObject allocatedContainer) {
|
||||
if (canAssign(application, priority, node, NodeType.RACK_LOCAL,
|
||||
reservedContainer)) {
|
||||
return assignContainer(clusterResource, node, application, priority,
|
||||
rackLocalResourceRequest, NodeType.RACK_LOCAL, reservedContainer,
|
||||
needToUnreserve);
|
||||
needToUnreserve, allocatedContainer);
|
||||
}
|
||||
|
||||
return Resources.none();
|
||||
}
|
||||
|
||||
private Resource assignOffSwitchContainers(Resource clusterResource,
|
||||
ResourceRequest offSwitchResourceRequest, FiCaSchedulerNode node,
|
||||
FiCaSchedulerApp application, Priority priority,
|
||||
RMContainer reservedContainer, boolean needToUnreserve) {
|
||||
if (canAssign(application, priority, node, NodeType.OFF_SWITCH,
|
||||
private Resource assignOffSwitchContainers(
|
||||
Resource clusterResource, ResourceRequest offSwitchResourceRequest,
|
||||
FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority,
|
||||
RMContainer reservedContainer, boolean needToUnreserve,
|
||||
MutableObject allocatedContainer) {
|
||||
if (canAssign(application, priority, node, NodeType.OFF_SWITCH,
|
||||
reservedContainer)) {
|
||||
return assignContainer(clusterResource, node, application, priority,
|
||||
offSwitchResourceRequest, NodeType.OFF_SWITCH, reservedContainer,
|
||||
needToUnreserve);
|
||||
needToUnreserve, allocatedContainer);
|
||||
}
|
||||
|
||||
return Resources.none();
|
||||
@ -1487,7 +1522,7 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
private Resource assignContainer(Resource clusterResource, FiCaSchedulerNode node,
|
||||
FiCaSchedulerApp application, Priority priority,
|
||||
ResourceRequest request, NodeType type, RMContainer rmContainer,
|
||||
boolean needToUnreserve) {
|
||||
boolean needToUnreserve, MutableObject createdContainer) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("assignContainers: node=" + node.getNodeName()
|
||||
+ " application=" + application.getApplicationId()
|
||||
@ -1592,7 +1627,7 @@ public class LeafQueue extends AbstractCSQueue {
|
||||
" container=" + container +
|
||||
" queue=" + this +
|
||||
" clusterResource=" + clusterResource);
|
||||
|
||||
createdContainer.setValue(allocatedContainer);
|
||||
return container.getResource();
|
||||
} else {
|
||||
// if we are allowed to allocate but this node doesn't have space, reserve it or
|
||||
|
@ -204,18 +204,55 @@ public class AppBlock extends HtmlBlock {
|
||||
table._();
|
||||
div._();
|
||||
|
||||
createContainerLocalityTable(html, attemptMetrics);
|
||||
createResourceRequestsTable(html, app);
|
||||
}
|
||||
|
||||
private void createContainerLocalityTable(Block html,
|
||||
RMAppAttemptMetrics attemptMetrics) {
|
||||
if (attemptMetrics == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
DIV<Hamlet> div = html.div(_INFO_WRAP);
|
||||
TABLE<DIV<Hamlet>> table =
|
||||
div.h3(
|
||||
"Total Allocated Containers: "
|
||||
+ attemptMetrics.getTotalAllocatedContainers()).h3("Each table cell"
|
||||
+ " represents the number of NodeLocal/RackLocal/OffSwitch containers"
|
||||
+ " satisfied by NodeLocal/RackLocal/OffSwitch resource requests.").table(
|
||||
"#containerLocality");
|
||||
table.
|
||||
tr().
|
||||
th(_TH, "").
|
||||
th(_TH, "Node Local Request").
|
||||
th(_TH, "Rack Local Request").
|
||||
th(_TH, "Off Switch Request").
|
||||
_();
|
||||
|
||||
String[] containersType =
|
||||
{ "Num Node Local Containers (satisfied by)", "Num Rack Local Containers (satisfied by)",
|
||||
"Num Off Switch Containers (satisfied by)" };
|
||||
boolean odd = false;
|
||||
for (int i = 0; i < attemptMetrics.getLocalityStatistics().length; i++) {
|
||||
table.tr((odd = !odd) ? _ODD : _EVEN).td(containersType[i])
|
||||
.td(String.valueOf(attemptMetrics.getLocalityStatistics()[i][0]))
|
||||
.td(i == 0 ? "" : String.valueOf(attemptMetrics.getLocalityStatistics()[i][1]))
|
||||
.td(i <= 1 ? "" : String.valueOf(attemptMetrics.getLocalityStatistics()[i][2]))._();
|
||||
}
|
||||
table._();
|
||||
div._();
|
||||
}
|
||||
|
||||
private void createResourceRequestsTable(Block html, AppInfo app) {
|
||||
TBODY<TABLE<Hamlet>> tbody =
|
||||
html.table("#ResourceRequests").thead().tr()
|
||||
.th(".priority", "Priority")
|
||||
.th(".resourceName", "ResourceName")
|
||||
.th(".resourceName", "Resource Name")
|
||||
.th(".totalResource", "Capability")
|
||||
.th(".numContainers", "NumContainers")
|
||||
.th(".relaxLocality", "RelaxLocality")
|
||||
.th(".nodeLabelExpression", "NodeLabelExpression")._()._().tbody();
|
||||
.th(".numContainers", "Num Containers")
|
||||
.th(".relaxLocality", "Relax Locality")
|
||||
.th(".nodeLabelExpression", "Node Label Expression")._()._().tbody();
|
||||
|
||||
Resource totalResource = Resource.newInstance(0, 0);
|
||||
if (app.getResourceRequests() != null) {
|
||||
|
@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||
@ -218,6 +219,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
@ -373,6 +375,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
@ -524,6 +527,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
@ -765,6 +769,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
@ -943,7 +948,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
final ApplicationAttemptId appAttemptId_1 = TestUtils
|
||||
@ -1073,6 +1078,7 @@ public class TestReservations {
|
||||
.getMockApplicationAttemptId(0, 0);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a,
|
||||
mock(ActiveUsersManager.class), spyRMContext);
|
||||
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
|
||||
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user