From eeda6891e49bc13ae86d0193f94238b7109e291d Mon Sep 17 00:00:00 2001 From: Devaraj K Date: Mon, 25 Mar 2019 13:22:53 -0700 Subject: [PATCH] YARN-9268. General improvements in FpgaDevice. Contributed by Peter Bacsko. --- .../resources/fpga/FpgaResourceAllocator.java | 137 +++++++----------- .../fpga/AoclDiagnosticOutputParser.java | 4 +- .../resourceplugin/fpga/FpgaDiscoverer.java | 2 +- .../fpga/discovery/DeviceSpecParser.java | 7 +- .../fpga/TestFpgaResourceHandler.java | 32 ++-- .../fpga/TestAoclOutputParser.java | 24 +-- .../fpga/TestFpgaDiscoverer.java | 17 +-- 7 files changed, 90 insertions(+), 133 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceAllocator.java index e5622f9222..b64ffd04d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceAllocator.java @@ -21,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,7 +52,7 @@ public class FpgaResourceAllocator { //key is resource type of FPGA, vendor plugin supported ID private LinkedHashMap> availableFpga = new LinkedHashMap<>(); - //key is requetor, aka. container ID + //key is requestor, aka. container ID private LinkedHashMap> usedFpgaByRequestor = new LinkedHashMap<>(); private Context nmContext; @@ -133,35 +134,33 @@ public String toString() { } } - public static class FpgaDevice implements Comparable, Serializable { + /** A class that represents an FPGA card. */ + public static class FpgaDevice implements Serializable { + private static final long serialVersionUID = -4678487141824092751L; + private final String type; + private final int major; + private final int minor; - private static final long serialVersionUID = 1L; - - private String type; - private Integer major; - private Integer minor; - // IP file identifier. matrix multiplication for instance - private String IPID; - // SHA-256 hash of the uploaded aocx file - private String aocxHash; - // the device name under /dev - private String devName; // the alias device name. Intel use acl number acl0 to acl31 - private String aliasDevName; - // lspci output's bus number: 02:00.00 (bus:slot.func) - private String busNum; - private String temperature; - private String cardPowerUsage; + private final String aliasDevName; + + // IP file identifier. matrix multiplication for instance (mutable) + private String IPID; + // SHA-256 hash of the uploaded aocx file (mutable) + private String aocxHash; + + // cached hash value + private Integer hashCode; public String getType() { return type; } - public Integer getMajor() { + public int getMajor() { return major; } - public Integer getMinor() { + public int getMinor() { return minor; } @@ -181,57 +180,16 @@ public void setIPID(String IPID) { this.IPID = IPID; } - public String getDevName() { - return devName; - } - - public void setDevName(String devName) { - this.devName = devName; - } - public String getAliasDevName() { return aliasDevName; } - public void setAliasDevName(String aliasDevName) { - this.aliasDevName = aliasDevName; - } - - public String getBusNum() { - return busNum; - } - - public void setBusNum(String busNum) { - this.busNum = busNum; - } - - public String getTemperature() { - return temperature; - } - - public String getCardPowerUsage() { - return cardPowerUsage; - } - - public FpgaDevice(String type, Integer major, Integer minor, String IPID) { - this.type = type; + public FpgaDevice(String type, int major, int minor, String aliasDevName) { + this.type = Preconditions.checkNotNull(type, "type must not be null"); this.major = major; this.minor = minor; - this.IPID = IPID; - } - - public FpgaDevice(String type, Integer major, - Integer minor, String IPID, String devName, - String aliasDevName, String busNum, String temperature, String cardPowerUsage) { - this.type = type; - this.major = major; - this.minor = minor; - this.IPID = IPID; - this.devName = devName; - this.aliasDevName = aliasDevName; - this.busNum = busNum; - this.temperature = temperature; - this.cardPowerUsage = cardPowerUsage; + this.aliasDevName = Preconditions.checkNotNull(aliasDevName, + "aliasDevName must not be null"); } @Override @@ -242,31 +200,48 @@ public boolean equals(Object obj) { if (obj == null) { return false; } - if (!(obj instanceof FpgaDevice)) { + if (getClass() != obj.getClass()) { return false; } FpgaDevice other = (FpgaDevice) obj; - if (other.getType().equals(this.type) && - other.getMajor().equals(this.major) && - other.getMinor().equals(this.minor)) { - return true; + if (aliasDevName == null) { + if (other.aliasDevName != null) { + return false; + } + } else if (!aliasDevName.equals(other.aliasDevName)) { + return false; } - return false; + if (major != other.major) { + return false; + } + if (minor != other.minor) { + return false; + } + if (type == null) { + if (other.type != null) { + return false; + } + } else if (!type.equals(other.type)) { + return false; + } + return true; } @Override public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((type == null) ? 0 : type.hashCode()); - result = prime * result + ((major == null) ? 0 : major.hashCode()); - result = prime * result + ((minor == null) ? 0 : minor.hashCode()); - return result; - } + if (hashCode == null) { + final int prime = 31; + int result = 1; - @Override - public int compareTo(FpgaDevice o) { - return 0; + result = prime * result + major; + result = prime * result + type.hashCode(); + result = prime * result + minor; + result = prime * result + aliasDevName.hashCode(); + + hashCode = result; + } + + return hashCode; } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java index 6d2f870a7b..a4665c684d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java @@ -149,8 +149,8 @@ public static List parseDiagnosticOutput( devices.add(new FpgaDevice(fpgaType, Integer.parseInt(mmn[0]), - Integer.parseInt(mmn[1]), null, - fields[0], aliasName, fields[1], fields[2], fields[3])); + Integer.parseInt(mmn[1]), + aliasName)); } else { LOG.warn("Failed to retrieve major/minor number for device"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/FpgaDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/FpgaDiscoverer.java index ecc29349c5..a0490383ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/FpgaDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/FpgaDiscoverer.java @@ -153,7 +153,7 @@ public List discover() // Replace list with a filtered one list = list .stream() - .filter(dev -> minors.contains(dev.getMinor().toString())) + .filter(dev -> minors.contains(String.valueOf(dev.getMinor()))) .collect(Collectors.toList()); // if the count of user configured is still larger than actual diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/discovery/DeviceSpecParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/discovery/DeviceSpecParser.java index 44f4b6c32a..4e64b6f896 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/discovery/DeviceSpecParser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/discovery/DeviceSpecParser.java @@ -66,12 +66,7 @@ static List getDevicesFromString(String type, String devices) fpgaDevices.add(new FpgaDevice(type, major, minor, - null, - null, - devName, - null, - null, - null)); + devName)); } catch (NumberFormatException e) { throw new ResourceHandlerException( "Cannot parse major/minor number: " + deviceSpec); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java index 1660b2e635..9564c7221f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java @@ -112,7 +112,7 @@ public void setup() throws IOException { // Assumed devices parsed from output deviceList = new ArrayList<>(); for (int i = 0; i < 5; i++) { - deviceList.add(new FpgaDevice(vendorType, 247, i, null)); + deviceList.add(new FpgaDevice(vendorType, 247, i, "acl" + i)); } String aocxPath = getTestParentFolder() + "/test.aocx"; mockVendorPlugin = mockPlugin(vendorType, deviceList, aocxPath); @@ -163,11 +163,11 @@ public void testBootstrap() throws ResourceHandlerException { for (String s : allowed.split(",")) { boolean check = false; for (FpgaDevice device : allowedDevices) { - if (device.getMinor().toString().equals(s)) { + if (String.valueOf(device.getMinor()).equals(s)) { check = true; } } - Assert.assertTrue("Minor:" + s +"found", check); + Assert.assertTrue("Minor:" + s +" found", check); } Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); @@ -398,10 +398,10 @@ public void testStateStore() public void testReacquireContainer() throws ResourceHandlerException { Container c0 = mockContainer(0, 2, "GEMM"); List assigned = new ArrayList<>(); - assigned.add(new - FpgaDevice(vendorType, 247, 0, null)); - assigned.add(new - FpgaDevice(vendorType, 247, 1, null)); + assigned.add(new FpgaDevice( + vendorType, 247, 0, "acl0")); + assigned.add(new FpgaDevice( + vendorType, 247, 1, "acl1")); // Mock we've stored the c0 states mockStateStoreForContainer(c0, assigned); // NM start @@ -419,10 +419,10 @@ public void testReacquireContainer() throws ResourceHandlerException { getUsedFpga().get(getContainerId(0).toString()); int count = 0; for (FpgaDevice device : used) { - if (device.getMinor().equals(0)){ + if (device.getMinor() == 0){ count++; } - if (device.getMinor().equals(1)) { + if (device.getMinor() == 1) { count++; } } @@ -434,7 +434,7 @@ public void testReacquireContainer() throws ResourceHandlerException { .get(vendorType); count = 0; for (FpgaDevice device : available) { - if (device.getMinor().equals(2)) { + if (device.getMinor() == 2) { count++; } } @@ -445,8 +445,8 @@ public void testReacquireContainer() throws ResourceHandlerException { // Case 2. Recover a not allowed device with minor number 5 Container c1 = mockContainer(1, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new - FpgaDevice(vendorType, 247, 5, null)); + assigned.add(new FpgaDevice( + vendorType, 247, 5, "acl0")); // Mock we've stored the c1 states mockStateStoreForContainer(c1, assigned); boolean flag = false; @@ -464,8 +464,8 @@ public void testReacquireContainer() throws ResourceHandlerException { // Case 3. recover a already used device by other container Container c2 = mockContainer(2, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new - FpgaDevice(vendorType, 247, 1, null)); + assigned.add(new FpgaDevice( + vendorType, 247, 1, "acl0")); // Mock we've stored the c2 states mockStateStoreForContainer(c2, assigned); flag = false; @@ -483,8 +483,8 @@ public void testReacquireContainer() throws ResourceHandlerException { // Case 4. recover a normal container c3 with remaining minor device number 2 Container c3 = mockContainer(3, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new - FpgaDevice(vendorType, 247, 2, null)); + assigned.add(new FpgaDevice( + vendorType, 247, 2, "acl2")); // Mock we've stored the c2 states mockStateStoreForContainer(c3, assigned); fpgaResourceHandler.reacquireContainer(getContainerId(3)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java index c83e6b14aa..76a75a8bfd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java @@ -83,31 +83,19 @@ public void testParsing() { assertEquals(3, devices.size()); assertEquals("IntelOpenCL", devices.get(0).getType()); - assertEquals("247", devices.get(0).getMajor().toString()); - assertEquals("0", devices.get(0).getMinor().toString()); + assertEquals(247, devices.get(0).getMajor()); + assertEquals(0, devices.get(0).getMinor()); assertEquals("acl0", devices.get(0).getAliasDevName()); - assertEquals("aclnalla_pcie0", devices.get(0).getDevName()); - assertEquals("02:00.00", devices.get(0).getBusNum()); - assertEquals("53.1 degrees C", devices.get(0).getTemperature()); - assertEquals("31.7 Watts", devices.get(0).getCardPowerUsage()); assertEquals("IntelOpenCL", devices.get(1).getType()); - assertEquals("247", devices.get(1).getMajor().toString()); - assertEquals("1", devices.get(1).getMinor().toString()); + assertEquals(247, devices.get(1).getMajor()); + assertEquals(1, devices.get(1).getMinor()); assertEquals("acl1", devices.get(1).getAliasDevName()); - assertEquals("aclnalla_pcie1", devices.get(1).getDevName()); - assertEquals("03:00.00", devices.get(1).getBusNum()); - assertEquals("43.1 degrees C", devices.get(1).getTemperature()); - assertEquals("11.7 Watts", devices.get(1).getCardPowerUsage()); assertEquals("IntelOpenCL", devices.get(2).getType()); - assertEquals("246", devices.get(2).getMajor().toString()); - assertEquals("0", devices.get(2).getMinor().toString()); + assertEquals(246, devices.get(2).getMajor()); + assertEquals(0, devices.get(2).getMinor()); assertEquals("acl2", devices.get(2).getAliasDevName()); - assertEquals("acla10_ref0", devices.get(2).getDevName()); - assertEquals("09:00.00", devices.get(2).getBusNum()); - assertEquals("50.5781 degrees C", devices.get(2).getTemperature()); - assertEquals("", devices.get(2).getCardPowerUsage()); // Case 2. check alias map assertEquals("acl0", devices.get(0).getAliasDevName()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java index 870661025d..1eb243132e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga; - import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; @@ -175,12 +174,12 @@ public void testDiscoveryWhenAvailableDevicesDefined() FpgaDevice device1 = devices.get(1); assertEquals("Device id", "acl0", device0.getAliasDevName()); - assertEquals("Minor number", new Integer(0), device0.getMinor()); - assertEquals("Major", new Integer(243), device0.getMajor()); + assertEquals("Minor number", 0, device0.getMinor()); + assertEquals("Major", 243, device0.getMajor()); assertEquals("Device id", "acl1", device1.getAliasDevName()); - assertEquals("Minor number", new Integer(1), device1.getMinor()); - assertEquals("Major", new Integer(244), device1.getMajor()); + assertEquals("Minor number", 1, device1.getMinor()); + assertEquals("Major", 244, device1.getMajor()); } @Test @@ -245,12 +244,12 @@ public void testDiscoveryWhenExternalScriptDefined() FpgaDevice device1 = devices.get(1); assertEquals("Device id", "acl0", device0.getAliasDevName()); - assertEquals("Minor number", new Integer(0), device0.getMinor()); - assertEquals("Major", new Integer(243), device0.getMajor()); + assertEquals("Minor number", 0, device0.getMinor()); + assertEquals("Major", 243, device0.getMajor()); assertEquals("Device id", "acl1", device1.getAliasDevName()); - assertEquals("Minor number", new Integer(1), device1.getMinor()); - assertEquals("Major", new Integer(244), device1.getMajor()); + assertEquals("Minor number", 1, device1.getMinor()); + assertEquals("Major", 244, device1.getMajor()); } @Test