YARN-10251. Show extended resources on legacy RM UI. Contributed by Eric Payne

(cherry picked from commit 64753addba)
This commit is contained in:
Jonathan Hung 2020-08-07 17:43:01 -07:00
parent 2943e6650f
commit 706d14c41d
6 changed files with 104 additions and 57 deletions

View File

@ -61,8 +61,8 @@ private static String getAppsTableColumnDefs(
// Update following line if any column added in RM page before column 11 // Update following line if any column added in RM page before column 11
sb.append("{'sType':'num-ignore-str', ") sb.append("{'sType':'num-ignore-str', ")
.append("'aTargets': [12, 13, 14, 15, 16] },\n"); .append("'aTargets': [12, 13, 14, 15, 16] },\n");
// set progress column index to 19 // set progress column index to 21
progressIndex = "[19]"; progressIndex = "[21]";
} else if (isFairSchedulerPage) { } else if (isFairSchedulerPage) {
// Update following line if any column added in scheduler page before column 11 // Update following line if any column added in scheduler page before column 11
sb.append("{'sType':'num-ignore-str', ") sb.append("{'sType':'num-ignore-str', ")
@ -112,4 +112,4 @@ public static String resourceRequestsTableInit() {
.toString(); .toString();
} }
} }

View File

@ -28,9 +28,12 @@
import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.StringHelper; import org.apache.hadoop.yarn.util.StringHelper;
@Public @Public
@ -63,8 +66,10 @@ public class AppInfo {
protected int priority; protected int priority;
private long allocatedCpuVcores; private long allocatedCpuVcores;
private long allocatedMemoryMB; private long allocatedMemoryMB;
private long allocatedGpus;
private long reservedCpuVcores; private long reservedCpuVcores;
private long reservedMemoryMB; private long reservedMemoryMB;
private long reservedGpus;
protected boolean unmanagedApplication; protected boolean unmanagedApplication;
private String appNodeLabelExpression; private String appNodeLabelExpression;
private String amNodeLabelExpression; private String amNodeLabelExpression;
@ -100,24 +105,35 @@ public AppInfo(ApplicationReport app) {
if (app.getPriority() != null) { if (app.getPriority() != null) {
priority = app.getPriority().getPriority(); priority = app.getPriority().getPriority();
} }
if (app.getApplicationResourceUsageReport() != null) { ApplicationResourceUsageReport usageReport =
runningContainers = app.getApplicationResourceUsageReport() app.getApplicationResourceUsageReport();
if (usageReport != null) {
runningContainers = usageReport
.getNumUsedContainers(); .getNumUsedContainers();
if (app.getApplicationResourceUsageReport().getUsedResources() != null) { if (usageReport.getUsedResources() != null) {
allocatedCpuVcores = app.getApplicationResourceUsageReport() allocatedCpuVcores = usageReport
.getUsedResources().getVirtualCores(); .getUsedResources().getVirtualCores();
allocatedMemoryMB = app.getApplicationResourceUsageReport() allocatedMemoryMB = usageReport
.getUsedResources().getMemorySize(); .getUsedResources().getMemorySize();
reservedCpuVcores = app.getApplicationResourceUsageReport() reservedCpuVcores = usageReport
.getReservedResources().getVirtualCores(); .getReservedResources().getVirtualCores();
reservedMemoryMB = app.getApplicationResourceUsageReport() reservedMemoryMB = usageReport
.getReservedResources().getMemorySize(); .getReservedResources().getMemorySize();
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
allocatedGpus = -1;
reservedGpus = -1;
if (gpuIndex != null) {
allocatedGpus = usageReport.getUsedResources()
.getResourceValue(ResourceInformation.GPU_URI);
reservedGpus = usageReport.getReservedResources()
.getResourceValue(ResourceInformation.GPU_URI);
}
} }
aggregateResourceAllocation = StringHelper.getResourceSecondsString( aggregateResourceAllocation = StringHelper.getResourceSecondsString(
app.getApplicationResourceUsageReport().getResourceSecondsMap()); usageReport.getResourceSecondsMap());
aggregatePreemptedResourceAllocation = StringHelper aggregatePreemptedResourceAllocation = StringHelper
.getResourceSecondsString(app.getApplicationResourceUsageReport() .getResourceSecondsString(usageReport.getPreemptedResourceSecondsMap());
.getPreemptedResourceSecondsMap());
} }
progress = app.getProgress() * 100; // in percent progress = app.getProgress() * 100; // in percent
if (app.getApplicationTags() != null && !app.getApplicationTags().isEmpty()) { if (app.getApplicationTags() != null && !app.getApplicationTags().isEmpty()) {
@ -176,6 +192,10 @@ public long getAllocatedMemoryMB() {
return allocatedMemoryMB; return allocatedMemoryMB;
} }
public long getAllocatedGpus() {
return allocatedGpus;
}
public long getReservedCpuVcores() { public long getReservedCpuVcores() {
return reservedCpuVcores; return reservedCpuVcores;
} }
@ -184,6 +204,10 @@ public long getReservedMemoryMB() {
return reservedMemoryMB; return reservedMemoryMB;
} }
public long getReservedGpus() {
return reservedGpus;
}
public float getProgress() { public float getProgress() {
return progress; return progress;
} }

View File

@ -19,14 +19,15 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp; package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
@ -62,35 +63,34 @@ protected void render(Block html) {
DIV<Hamlet> div = html.div().$class("metrics"); DIV<Hamlet> div = html.div().$class("metrics");
long usedMemoryBytes = 0; Resource usedResources;
long totalMemoryBytes = 0; Resource totalResources;
long reservedMemoryBytes = 0; Resource reservedResources;
long usedVCores = 0; int allocatedContainers;
long totalVCores = 0;
long reservedVCores = 0;
if (clusterMetrics.getCrossPartitionMetricsAvailable()) { if (clusterMetrics.getCrossPartitionMetricsAvailable()) {
ResourceInfo usedAllPartitions = allocatedContainers =
clusterMetrics.getTotalUsedResourcesAcrossPartition(); clusterMetrics.getTotalAllocatedContainersAcrossPartition();
ResourceInfo totalAllPartitions = usedResources =
clusterMetrics.getTotalClusterResourcesAcrossPartition(); clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource();
ResourceInfo reservedAllPartitions = totalResources =
clusterMetrics.getTotalReservedResourcesAcrossPartition(); clusterMetrics.getTotalClusterResourcesAcrossPartition()
usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB; .getResource();
totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB; reservedResources =
reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB; clusterMetrics.getTotalReservedResourcesAcrossPartition()
usedVCores = usedAllPartitions.getvCores(); .getResource();
totalVCores = totalAllPartitions.getvCores();
reservedVCores = reservedAllPartitions.getvCores();
// getTotalUsedResourcesAcrossPartition includes reserved resources. // getTotalUsedResourcesAcrossPartition includes reserved resources.
usedMemoryBytes -= reservedMemoryBytes; Resources.subtractFrom(usedResources, reservedResources);
usedVCores -= reservedVCores;
} else { } else {
usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB; allocatedContainers = clusterMetrics.getContainersAllocated();
totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB; usedResources = Resource.newInstance(
reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB; clusterMetrics.getAllocatedMB() * BYTES_IN_MB,
usedVCores = clusterMetrics.getAllocatedVirtualCores(); (int) clusterMetrics.getAllocatedVirtualCores());
totalVCores = clusterMetrics.getTotalVirtualCores(); totalResources = Resource.newInstance(
reservedVCores = clusterMetrics.getReservedVirtualCores(); clusterMetrics.getTotalMB() * BYTES_IN_MB,
(int) clusterMetrics.getTotalVirtualCores());
reservedResources = Resource.newInstance(
clusterMetrics.getReservedMB() * BYTES_IN_MB,
(int) clusterMetrics.getReservedVirtualCores());
} }
div.h3("Cluster Metrics"). div.h3("Cluster Metrics").
@ -102,12 +102,9 @@ protected void render(Block html) {
th().$class("ui-state-default").__("Apps Running").__(). th().$class("ui-state-default").__("Apps Running").__().
th().$class("ui-state-default").__("Apps Completed").__(). th().$class("ui-state-default").__("Apps Completed").__().
th().$class("ui-state-default").__("Containers Running").__(). th().$class("ui-state-default").__("Containers Running").__().
th().$class("ui-state-default").__("Memory Used").__(). th().$class("ui-state-default").__("Used Resources").__().
th().$class("ui-state-default").__("Memory Total").__(). th().$class("ui-state-default").__("Total Resources").__().
th().$class("ui-state-default").__("Memory Reserved").__(). th().$class("ui-state-default").__("Reserved Resources").__().
th().$class("ui-state-default").__("VCores Used").__().
th().$class("ui-state-default").__("VCores Total").__().
th().$class("ui-state-default").__("VCores Reserved").__().
__(). __().
__(). __().
tbody().$class("ui-widget-content"). tbody().$class("ui-widget-content").
@ -121,14 +118,10 @@ protected void render(Block html) {
clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled() clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled()
) )
). ).
td(String.valueOf( td(String.valueOf(allocatedContainers)).
clusterMetrics.getTotalAllocatedContainersAcrossPartition())). td(usedResources.toString()).
td(StringUtils.byteDesc(usedMemoryBytes)). td(totalResources.toString()).
td(StringUtils.byteDesc(totalMemoryBytes)). td(reservedResources.toString()).
td(StringUtils.byteDesc(reservedMemoryBytes)).
td(String.valueOf(usedVCores)).
td(String.valueOf(totalVCores)).
td(String.valueOf(reservedVCores)).
__(). __().
__().__(); __().__();

View File

@ -22,6 +22,7 @@
import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.text.StringEscapeUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@ -30,6 +31,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.SubView;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.TABLE;
@ -86,7 +88,9 @@ protected void render(Block html) {
.th(".mem", "Mem Used") .th(".mem", "Mem Used")
.th(".mem", "Mem Avail") .th(".mem", "Mem Avail")
.th(".vcores", "VCores Used") .th(".vcores", "VCores Used")
.th(".vcores", "VCores Avail"); .th(".vcores", "VCores Avail")
.th(".gpus", "GPUs Used")
.th(".gpus", "GPUs Avail");
} else { } else {
trbody.th(".containers", "Running Containers (G)") trbody.th(".containers", "Running Containers (G)")
.th(".allocationTags", "Allocation Tags") .th(".allocationTags", "Allocation Tags")
@ -94,6 +98,8 @@ protected void render(Block html) {
.th(".mem", "Mem Avail (G)") .th(".mem", "Mem Avail (G)")
.th(".vcores", "VCores Used (G)") .th(".vcores", "VCores Used (G)")
.th(".vcores", "VCores Avail (G)") .th(".vcores", "VCores Avail (G)")
.th(".gpus", "GPUs Used (G)")
.th(".gpus", "GPUs Avail (G)")
.th(".containers", "Running Containers (O)") .th(".containers", "Running Containers (O)")
.th(".mem", "Mem Used (O)") .th(".mem", "Mem Used (O)")
.th(".vcores", "VCores Used (O)") .th(".vcores", "VCores Used (O)")
@ -165,6 +171,16 @@ protected void render(Block html) {
nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress) nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress)
.append("'>").append(httpAddress).append("</a>\",").append("\""); .append("'>").append(httpAddress).append("</a>\",").append("\"");
} }
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
long usedGPUs = 0;
long availableGPUs = 0;
if (gpuIndex != null) {
usedGPUs = info.getUsedResource().getResource()
.getResourceValue(ResourceInformation.GPU_URI);
availableGPUs = info.getAvailableResource().getResource()
.getResourceValue(ResourceInformation.GPU_URI);
}
nodeTableData.append("<br title='") nodeTableData.append("<br title='")
.append(String.valueOf(info.getLastHealthUpdate())).append("'>") .append(String.valueOf(info.getLastHealthUpdate())).append("'>")
.append(Times.format(info.getLastHealthUpdate())).append("\",\"") .append(Times.format(info.getLastHealthUpdate())).append("\",\"")
@ -179,6 +195,10 @@ protected void render(Block html) {
.append("\",\"").append(String.valueOf(info.getUsedVirtualCores())) .append("\",\"").append(String.valueOf(info.getUsedVirtualCores()))
.append("\",\"") .append("\",\"")
.append(String.valueOf(info.getAvailableVirtualCores())) .append(String.valueOf(info.getAvailableVirtualCores()))
.append("\",\"")
.append(String.valueOf(usedGPUs))
.append("\",\"")
.append(String.valueOf(availableGPUs))
.append("\",\""); .append("\",\"");
// If opportunistic containers are enabled, add extra fields. // If opportunistic containers are enabled, add extra fields.

View File

@ -69,8 +69,10 @@ public class RMAppsBlock extends AppsBlock {
new ColumnHeader(".runningcontainer", "Running Containers"), new ColumnHeader(".runningcontainer", "Running Containers"),
new ColumnHeader(".allocatedCpu", "Allocated CPU VCores"), new ColumnHeader(".allocatedCpu", "Allocated CPU VCores"),
new ColumnHeader(".allocatedMemory", "Allocated Memory MB"), new ColumnHeader(".allocatedMemory", "Allocated Memory MB"),
new ColumnHeader(".allocatedGpu", "Allocated GPUs"),
new ColumnHeader(".reservedCpu", "Reserved CPU VCores"), new ColumnHeader(".reservedCpu", "Reserved CPU VCores"),
new ColumnHeader(".reservedMemory", "Reserved Memory MB"), new ColumnHeader(".reservedMemory", "Reserved Memory MB"),
new ColumnHeader(".reservedGpu", "Reserved GPUs"),
new ColumnHeader(".queuePercentage", "% of Queue"), new ColumnHeader(".queuePercentage", "% of Queue"),
new ColumnHeader(".clusterPercentage", "% of Cluster"), new ColumnHeader(".clusterPercentage", "% of Cluster"),
new ColumnHeader(".progress", "Progress"), new ColumnHeader(".progress", "Progress"),
@ -119,6 +121,7 @@ protected void renderData(Block html) {
String blacklistedNodesCount = "N/A"; String blacklistedNodesCount = "N/A";
RMApp rmApp = rm.getRMContext().getRMApps() RMApp rmApp = rm.getRMContext().getRMApps()
.get(appAttemptId.getApplicationId()); .get(appAttemptId.getApplicationId());
boolean isAppInCompletedState = false;
if (rmApp != null) { if (rmApp != null) {
RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId); RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId);
Set<String> nodes = Set<String> nodes =
@ -126,6 +129,7 @@ protected void renderData(Block html) {
if (nodes != null) { if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size()); blacklistedNodesCount = String.valueOf(nodes.size());
} }
isAppInCompletedState = rmApp.isAppInCompletedStates();
} }
String percent = StringUtils.format("%.1f", app.getProgress()); String percent = StringUtils.format("%.1f", app.getProgress());
appsTableData appsTableData
@ -171,12 +175,18 @@ protected void renderData(Block html) {
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" : .append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getAllocatedMemoryMB())) String.valueOf(app.getAllocatedMemoryMB()))
.append("\",\"") .append("\",\"")
.append((isAppInCompletedState && app.getAllocatedGpus() <= 0)
? UNAVAILABLE : String.valueOf(app.getAllocatedGpus()))
.append("\",\"")
.append(app.getReservedCpuVcores() == -1 ? "N/A" : String .append(app.getReservedCpuVcores() == -1 ? "N/A" : String
.valueOf(app.getReservedCpuVcores())) .valueOf(app.getReservedCpuVcores()))
.append("\",\"") .append("\",\"")
.append(app.getReservedMemoryMB() == -1 ? "N/A" : .append(app.getReservedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getReservedMemoryMB())) String.valueOf(app.getReservedMemoryMB()))
.append("\",\"") .append("\",\"")
.append((isAppInCompletedState && app.getReservedGpus() <= 0)
? UNAVAILABLE : String.valueOf(app.getReservedGpus()))
.append("\",\"")
.append(queuePercent) .append(queuePercent)
.append("\",\"") .append("\",\"")
.append(clusterPercent) .append(clusterPercent)

View File

@ -48,8 +48,8 @@ public class TestNodesPage {
// Number of Actual Table Headers for NodesPage.NodesBlock might change in // Number of Actual Table Headers for NodesPage.NodesBlock might change in
// future. In that case this value should be adjusted to the new value. // future. In that case this value should be adjusted to the new value.
private final int numberOfThInMetricsTable = 23; private final int numberOfThInMetricsTable = 20;
private final int numberOfActualTableHeaders = 14; private final int numberOfActualTableHeaders = 16;
private final int numberOfThForOpportunisticContainers = 4; private final int numberOfThForOpportunisticContainers = 4;
private Injector injector; private Injector injector;