YARN-3262. Surface application outstanding resource requests table in RM web UI. (Jian He via wangda)
This commit is contained in:
parent
cf51ff2fe8
commit
edcecedc1c
@ -336,6 +336,9 @@ Release 2.7.0 - UNRELEASED
|
||||
YARN-2820. Retry in FileSystemRMStateStore when FS's operations fail
|
||||
due to IOException. (Zhihai Xu via ozawa)
|
||||
|
||||
YARN-3262. Surface application outstanding resource requests table
|
||||
in RM web UI. (Jian He via wangda)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-2990. FairScheduler's delay-scheduling always waits for node-local and
|
||||
|
@ -140,13 +140,13 @@ public void setCapability(Resource capability) {
|
||||
this.capability = capability;
|
||||
}
|
||||
@Override
|
||||
public int getNumContainers() {
|
||||
public synchronized int getNumContainers() {
|
||||
ResourceRequestProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return (p.getNumContainers());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNumContainers(int numContainers) {
|
||||
public synchronized void setNumContainers(int numContainers) {
|
||||
maybeInitBuilder();
|
||||
builder.setNumContainers((numContainers));
|
||||
}
|
||||
|
@ -658,4 +658,13 @@ protected void refreshMaximumAllocation(Resource newMaxAlloc) {
|
||||
maxAllocWriteLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<ResourceRequest> getPendingResourceRequestsForAttempt(
|
||||
ApplicationAttemptId attemptId) {
|
||||
SchedulerApplicationAttempt attempt = getApplicationAttempt(attemptId);
|
||||
if (attempt != null) {
|
||||
return attempt.getAppSchedulingInfo().getAllResourceRequests();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -20,12 +20,14 @@
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
@ -64,7 +66,7 @@ public class AppSchedulingInfo {
|
||||
final Set<Priority> priorities = new TreeSet<Priority>(
|
||||
new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator());
|
||||
final Map<Priority, Map<String, ResourceRequest>> requests =
|
||||
new HashMap<Priority, Map<String, ResourceRequest>>();
|
||||
new ConcurrentHashMap<Priority, Map<String, ResourceRequest>>();
|
||||
private Set<String> blacklist = new HashSet<String>();
|
||||
|
||||
//private final ApplicationStore store;
|
||||
@ -159,7 +161,7 @@ synchronized public void updateResourceRequests(
|
||||
Map<String, ResourceRequest> asks = this.requests.get(priority);
|
||||
|
||||
if (asks == null) {
|
||||
asks = new HashMap<String, ResourceRequest>();
|
||||
asks = new ConcurrentHashMap<String, ResourceRequest>();
|
||||
this.requests.put(priority, asks);
|
||||
this.priorities.add(priority);
|
||||
}
|
||||
@ -221,7 +223,7 @@ synchronized public Map<String, ResourceRequest> getResourceRequests(
|
||||
return requests.get(priority);
|
||||
}
|
||||
|
||||
synchronized public List<ResourceRequest> getAllResourceRequests() {
|
||||
public List<ResourceRequest> getAllResourceRequests() {
|
||||
List<ResourceRequest> ret = new ArrayList<ResourceRequest>();
|
||||
for (Map<String, ResourceRequest> r : requests.values()) {
|
||||
ret.addAll(r.values());
|
||||
@ -300,17 +302,11 @@ synchronized private void allocateNodeLocal(SchedulerNode node,
|
||||
Priority priority, ResourceRequest nodeLocalRequest, Container container,
|
||||
List<ResourceRequest> resourceRequests) {
|
||||
// Update future requirements
|
||||
nodeLocalRequest.setNumContainers(nodeLocalRequest.getNumContainers() - 1);
|
||||
if (nodeLocalRequest.getNumContainers() == 0) {
|
||||
this.requests.get(priority).remove(node.getNodeName());
|
||||
}
|
||||
decResourceRequest(node.getNodeName(), priority, nodeLocalRequest);
|
||||
|
||||
ResourceRequest rackLocalRequest = requests.get(priority).get(
|
||||
node.getRackName());
|
||||
rackLocalRequest.setNumContainers(rackLocalRequest.getNumContainers() - 1);
|
||||
if (rackLocalRequest.getNumContainers() == 0) {
|
||||
this.requests.get(priority).remove(node.getRackName());
|
||||
}
|
||||
decResourceRequest(node.getRackName(), priority, rackLocalRequest);
|
||||
|
||||
ResourceRequest offRackRequest = requests.get(priority).get(
|
||||
ResourceRequest.ANY);
|
||||
@ -322,6 +318,14 @@ synchronized private void allocateNodeLocal(SchedulerNode node,
|
||||
resourceRequests.add(cloneResourceRequest(offRackRequest));
|
||||
}
|
||||
|
||||
private void decResourceRequest(String resourceName, Priority priority,
|
||||
ResourceRequest request) {
|
||||
request.setNumContainers(request.getNumContainers() - 1);
|
||||
if (request.getNumContainers() == 0) {
|
||||
requests.get(priority).remove(resourceName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The {@link ResourceScheduler} is allocating data-local resources to the
|
||||
* application.
|
||||
@ -333,11 +337,8 @@ synchronized private void allocateRackLocal(SchedulerNode node,
|
||||
Priority priority, ResourceRequest rackLocalRequest, Container container,
|
||||
List<ResourceRequest> resourceRequests) {
|
||||
// Update future requirements
|
||||
rackLocalRequest.setNumContainers(rackLocalRequest.getNumContainers() - 1);
|
||||
if (rackLocalRequest.getNumContainers() == 0) {
|
||||
this.requests.get(priority).remove(node.getRackName());
|
||||
}
|
||||
|
||||
decResourceRequest(node.getRackName(), priority, rackLocalRequest);
|
||||
|
||||
ResourceRequest offRackRequest = requests.get(priority).get(
|
||||
ResourceRequest.ANY);
|
||||
decrementOutstanding(offRackRequest);
|
||||
|
@ -153,7 +153,11 @@ public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId,
|
||||
public synchronized Collection<RMContainer> getLiveContainers() {
|
||||
return new ArrayList<RMContainer>(liveContainers.values());
|
||||
}
|
||||
|
||||
|
||||
public AppSchedulingInfo getAppSchedulingInfo() {
|
||||
return this.appSchedulingInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this application pending?
|
||||
* @return true if it is else false.
|
||||
|
@ -35,6 +35,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.QueueACL;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
@ -50,6 +51,7 @@
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
|
||||
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
||||
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
|
||||
import org.apache.hadoop.yarn.webapp.view.InfoBlock;
|
||||
@ -90,7 +92,8 @@ protected void render(Block html) {
|
||||
puts("Application not found: "+ aid);
|
||||
return;
|
||||
}
|
||||
AppInfo app = new AppInfo(rmApp, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
AppInfo app =
|
||||
new AppInfo(rm, rmApp, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
|
||||
// Check for the authorization.
|
||||
String remoteUser = request().getRemoteUser();
|
||||
@ -134,7 +137,7 @@ protected void render(Block html) {
|
||||
._("Application Type:", app.getApplicationType())
|
||||
._("Application Tags:", app.getApplicationTags())
|
||||
._("YarnApplicationState:", clarifyAppState(app.getState()))
|
||||
._("FinalStatus reported by AM:",
|
||||
._("FinalStatus Reported by AM:",
|
||||
clairfyAppFinalStatus(app.getFinalStatus()))
|
||||
._("Started:", Times.format(app.getStartTime()))
|
||||
._("Elapsed:",
|
||||
@ -200,6 +203,45 @@ protected void render(Block html) {
|
||||
|
||||
table._();
|
||||
div._();
|
||||
|
||||
createResourceRequestsTable(html, app);
|
||||
}
|
||||
|
||||
private void createResourceRequestsTable(Block html, AppInfo app) {
|
||||
TBODY<TABLE<Hamlet>> tbody =
|
||||
html.table("#ResourceRequests").thead().tr()
|
||||
.th(".priority", "Priority")
|
||||
.th(".resourceName", "ResourceName")
|
||||
.th(".totalResource", "Capability")
|
||||
.th(".numContainers", "NumContainers")
|
||||
.th(".relaxLocality", "RelaxLocality")
|
||||
.th(".nodeLabelExpression", "NodeLabelExpression")._()._().tbody();
|
||||
|
||||
Resource totalResource = Resource.newInstance(0, 0);
|
||||
if (app.getResourceRequests() != null) {
|
||||
for (ResourceRequest request : app.getResourceRequests()) {
|
||||
if (request.getNumContainers() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
tbody.tr()
|
||||
.td(String.valueOf(request.getPriority()))
|
||||
.td(request.getResourceName())
|
||||
.td(String.valueOf(request.getCapability()))
|
||||
.td(String.valueOf(request.getNumContainers()))
|
||||
.td(String.valueOf(request.getRelaxLocality()))
|
||||
.td(request.getNodeLabelExpression() == null ? "N/A" : request
|
||||
.getNodeLabelExpression())._();
|
||||
if (request.getResourceName().equals(ResourceRequest.ANY)) {
|
||||
Resources.addTo(totalResource,
|
||||
Resources.multiply(request.getCapability(),
|
||||
request.getNumContainers()));
|
||||
}
|
||||
}
|
||||
}
|
||||
html.div().$class("totalResourceRequests")
|
||||
.h3("Total Outstanding Resource Requests: " + totalResource)._();
|
||||
tbody._()._();
|
||||
}
|
||||
|
||||
private String clarifyAppState(YarnApplicationState state) {
|
||||
|
@ -18,12 +18,16 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
|
||||
|
||||
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID;
|
||||
|
||||
import org.apache.hadoop.yarn.webapp.SubView;
|
||||
|
||||
public class AppPage extends RmView {
|
||||
|
||||
@Override protected void preHead(Page.HTML<_> html) {
|
||||
commonPreHead(html);
|
||||
set(DATATABLES_ID, "ResourceRequests");
|
||||
setTableStyles(html, "ResourceRequests");
|
||||
}
|
||||
|
||||
@Override protected Class<? extends SubView> content() {
|
||||
|
@ -46,12 +46,13 @@
|
||||
class AppsBlock extends HtmlBlock {
|
||||
final ConcurrentMap<ApplicationId, RMApp> apps;
|
||||
private final Configuration conf;
|
||||
|
||||
final ResourceManager rm;
|
||||
@Inject
|
||||
AppsBlock(ResourceManager rm, ViewContext ctx, Configuration conf) {
|
||||
super(ctx);
|
||||
apps = rm.getRMContext().getRMApps();
|
||||
this.conf = conf;
|
||||
this.rm = rm;
|
||||
}
|
||||
|
||||
@Override public void render(Block html) {
|
||||
@ -85,7 +86,7 @@ class AppsBlock extends HtmlBlock {
|
||||
if (reqAppStates != null && !reqAppStates.contains(app.createApplicationState())) {
|
||||
continue;
|
||||
}
|
||||
AppInfo appInfo = new AppInfo(app, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
AppInfo appInfo = new AppInfo(rm, app, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
String percent = String.format("%.1f", appInfo.getProgress());
|
||||
//AppID numerical value parsed by parseHadoopID in yarn.dt.plugins.js
|
||||
appsTableData.append("[\"<a href='")
|
||||
|
@ -56,7 +56,7 @@ public class FairSchedulerAppsBlock extends HtmlBlock {
|
||||
final ConcurrentMap<ApplicationId, RMApp> apps;
|
||||
final FairSchedulerInfo fsinfo;
|
||||
final Configuration conf;
|
||||
|
||||
final ResourceManager rm;
|
||||
@Inject
|
||||
public FairSchedulerAppsBlock(ResourceManager rm, ViewContext ctx,
|
||||
Configuration conf) {
|
||||
@ -73,6 +73,7 @@ public FairSchedulerAppsBlock(ResourceManager rm, ViewContext ctx,
|
||||
}
|
||||
}
|
||||
this.conf = conf;
|
||||
this.rm = rm;
|
||||
}
|
||||
|
||||
@Override public void render(Block html) {
|
||||
@ -107,7 +108,7 @@ public FairSchedulerAppsBlock(ResourceManager rm, ViewContext ctx,
|
||||
if (reqAppStates != null && !reqAppStates.contains(app.createApplicationState())) {
|
||||
continue;
|
||||
}
|
||||
AppInfo appInfo = new AppInfo(app, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
AppInfo appInfo = new AppInfo(rm, app, true, WebAppUtils.getHttpSchemePrefix(conf));
|
||||
String percent = String.format("%.1f", appInfo.getProgress());
|
||||
ApplicationAttemptId attemptId = app.getCurrentAppAttempt().getAppAttemptId();
|
||||
int fairShare = fsinfo.getAppFairShare(attemptId);
|
||||
|
@ -476,8 +476,8 @@ public AppsInfo getApps(@Context HttpServletRequest hsr,
|
||||
}
|
||||
}
|
||||
|
||||
AppInfo app = new AppInfo(rmapp, hasAccess(rmapp, hsr),
|
||||
WebAppUtils.getHttpSchemePrefix(conf));
|
||||
AppInfo app = new AppInfo(rm, rmapp,
|
||||
hasAccess(rmapp, hsr), WebAppUtils.getHttpSchemePrefix(conf));
|
||||
allApps.add(app);
|
||||
}
|
||||
return allApps;
|
||||
@ -617,7 +617,7 @@ public AppInfo getApp(@Context HttpServletRequest hsr,
|
||||
if (app == null) {
|
||||
throw new NotFoundException("app with id: " + appId + " not found");
|
||||
}
|
||||
return new AppInfo(app, hasAccess(app, hsr), hsr.getScheme() + "://");
|
||||
return new AppInfo(rm, app, hasAccess(app, hsr), hsr.getScheme() + "://");
|
||||
}
|
||||
|
||||
@GET
|
||||
|
@ -17,6 +17,8 @@
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
@ -27,11 +29,13 @@
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.hadoop.yarn.util.Times;
|
||||
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
||||
@ -88,10 +92,14 @@ public class AppInfo {
|
||||
protected int numNonAMContainerPreempted;
|
||||
protected int numAMContainerPreempted;
|
||||
|
||||
protected List<ResourceRequest> resourceRequests;
|
||||
|
||||
public AppInfo() {
|
||||
} // JAXB needs this
|
||||
|
||||
public AppInfo(RMApp app, Boolean hasAccess, String schemePrefix) {
|
||||
@SuppressWarnings({ "rawtypes", "unchecked" })
|
||||
public AppInfo(ResourceManager rm, RMApp app, Boolean hasAccess,
|
||||
String schemePrefix) {
|
||||
this.schemePrefix = schemePrefix;
|
||||
if (app != null) {
|
||||
String trackingUrl = app.getTrackingUrl();
|
||||
@ -154,6 +162,9 @@ public AppInfo(RMApp app, Boolean hasAccess, String schemePrefix) {
|
||||
allocatedVCores = usedResources.getVirtualCores();
|
||||
runningContainers = resourceReport.getNumUsedContainers();
|
||||
}
|
||||
resourceRequests =
|
||||
((AbstractYarnScheduler) rm.getRMContext().getScheduler())
|
||||
.getPendingResourceRequestsForAttempt(attempt.getAppAttemptId());
|
||||
}
|
||||
}
|
||||
|
||||
@ -299,4 +310,8 @@ public long getMemorySeconds() {
|
||||
public long getVcoreSeconds() {
|
||||
return vcoreSeconds;
|
||||
}
|
||||
|
||||
public List<ResourceRequest> getResourceRequests() {
|
||||
return this.resourceRequests;
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
import com.google.inject.Binder;
|
||||
import com.google.inject.Injector;
|
||||
import com.google.inject.Module;
|
||||
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
@ -35,8 +36,8 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration;
|
||||
@ -149,13 +150,18 @@ public YarnApplicationState createApplicationState() {
|
||||
i++;
|
||||
}
|
||||
|
||||
return new RMContextImpl(null, null, null, null,
|
||||
RMContextImpl rmContext = new RMContextImpl(null, null, null, null,
|
||||
null, null, null, null, null, null) {
|
||||
@Override
|
||||
public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
|
||||
return applicationsMaps;
|
||||
}
|
||||
@Override
|
||||
public ResourceScheduler getScheduler() {
|
||||
return mock(AbstractYarnScheduler.class);
|
||||
}
|
||||
};
|
||||
return rmContext;
|
||||
}
|
||||
|
||||
private static ResourceManager mockRm(RMContext rmContext) throws
|
||||
|
@ -1314,8 +1314,7 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException,
|
||||
public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException,
|
||||
Exception {
|
||||
|
||||
// 28 because trackingUrl not assigned yet
|
||||
assertEquals("incorrect number of elements", 26, info.length());
|
||||
assertEquals("incorrect number of elements", 27, info.length());
|
||||
|
||||
verifyAppInfoGeneric(app, info.getString("id"), info.getString("user"),
|
||||
info.getString("name"), info.getString("applicationType"),
|
||||
|
Loading…
Reference in New Issue
Block a user