From 8e1539eca8175f58f836daa5b0ad598c890cbb02 Mon Sep 17 00:00:00 2001 From: Sunil G Date: Wed, 13 Mar 2019 02:45:17 +0530 Subject: [PATCH] YARN-9266. General improvements in IntelFpgaOpenclPlugin. Contributed by Peter Bacsko. --- .../fpga/FpgaResourceHandlerImpl.java | 55 +-- .../fpga/AbstractFpgaVendorPlugin.java | 17 +- .../fpga/AoclDiagnosticOutputParser.java | 166 +++++++++ .../fpga/IntelFpgaOpenclPlugin.java | 318 ++++++------------ .../resourceplugin/fpga/package-info.java | 19 ++ .../fpga/TestFpgaResourceHandler.java | 214 ++++++++---- .../fpga/TestAoclOutputParser.java | 117 +++++++ .../fpga/TestFpgaDiscoverer.java | 134 ++------ 8 files changed, 619 insertions(+), 421 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/package-info.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceHandlerImpl.java index c952906f16..61ffd355d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/FpgaResourceHandlerImpl.java @@ -25,7 +25,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; @@ -37,13 +36,13 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.AbstractFpgaVendorPlugin; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.FpgaDiscoverer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; import static org.apache.hadoop.yarn.api.records.ResourceInformation.FPGA_URI; @@ -89,21 +88,25 @@ public String getRequestedIPID(Container container) { } @Override - public List bootstrap(Configuration configuration) throws ResourceHandlerException { + public List bootstrap(Configuration configuration) + throws ResourceHandlerException { // The plugin should be initilized by FpgaDiscoverer already if (!vendorPlugin.initPlugin(configuration)) { - throw new ResourceHandlerException("FPGA plugin initialization failed", null); + throw new ResourceHandlerException("FPGA plugin initialization failed"); } LOG.info("FPGA Plugin bootstrap success."); // Get avialable devices minor numbers from toolchain or static configuration - List fpgaDeviceList = FpgaDiscoverer.getInstance().discover(); + List fpgaDeviceList = + FpgaDiscoverer.getInstance().discover(); allocator.addFpga(vendorPlugin.getFpgaType(), fpgaDeviceList); - this.cGroupsHandler.initializeCGroupController(CGroupsHandler.CGroupController.DEVICES); + this.cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.DEVICES); return null; } @Override - public List preStart(Container container) throws ResourceHandlerException { + public List preStart(Container container) + throws ResourceHandlerException { // 1. Get requested FPGA type and count, choose corresponding FPGA plugin(s) // 2. Use allocator.assignFpga(type, count) to get FPGAAllocation // 3. If required, download to ensure IP file exists and configure IP file for all devices @@ -126,7 +129,8 @@ public List preStart(Container container) throws ResourceHa container, getRequestedIPID(container)); LOG.info("FpgaAllocation:" + allocation); - PrivilegedOperation privilegedOperation = new PrivilegedOperation(PrivilegedOperation.OperationType.FPGA, + PrivilegedOperation privilegedOperation = + new PrivilegedOperation(PrivilegedOperation.OperationType.FPGA, Arrays.asList(CONTAINER_ID_CLI_OPTION, containerIdStr)); if (!allocation.getDenied().isEmpty()) { List denied = new ArrayList<>(); @@ -134,7 +138,8 @@ public List preStart(Container container) throws ResourceHa privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_FPGAS_CLI_OPTION, StringUtils.join(",", denied))); } - privilegedOperationExecutor.executePrivilegedOperation(privilegedOperation, true); + privilegedOperationExecutor.executePrivilegedOperation( + privilegedOperation, true); if (deviceCount > 0) { /** @@ -152,25 +157,30 @@ public List preStart(Container container) throws ResourceHa * for different devices * * */ - ipFilePath = vendorPlugin.downloadIP(getRequestedIPID(container), container.getWorkDir(), + ipFilePath = vendorPlugin.retrieveIPfilePath( + getRequestedIPID(container), + container.getWorkDir(), container.getResourceSet().getLocalizedResources()); - if (ipFilePath.isEmpty()) { - LOG.warn("FPGA plugin failed to download IP but continue, please check the value of environment viable: " + - REQUEST_FPGA_IP_ID_KEY + " if you want yarn to help"); + if (ipFilePath == null) { + LOG.warn("FPGA plugin failed to downloaded IP, please check the" + + " value of environment viable: " + REQUEST_FPGA_IP_ID_KEY + + " if you want YARN to program the device"); } else { LOG.info("IP file path:" + ipFilePath); List allowed = allocation.getAllowed(); String majorMinorNumber; for (int i = 0; i < allowed.size(); i++) { - majorMinorNumber = allowed.get(i).getMajor() + ":" + allowed.get(i).getMinor(); - String currentIPID = allowed.get(i).getIPID(); + FpgaDevice device = allowed.get(i); + majorMinorNumber = device.getMajor() + ":" + device.getMinor(); + String currentIPID = device.getIPID(); if (null != currentIPID && currentIPID.equalsIgnoreCase(getRequestedIPID(container))) { - LOG.info("IP already in device \"" + allowed.get(i).getAliasDevName() + "," + - majorMinorNumber + "\", skip reprogramming"); + LOG.info("IP already in device \"" + + allowed.get(i).getAliasDevName() + + "," + majorMinorNumber + "\", skip reprogramming"); continue; } - if (vendorPlugin.configureIP(ipFilePath, majorMinorNumber)) { + if (vendorPlugin.configureIP(ipFilePath, device)) { // update the allocator that we update an IP of a device allocator.updateFpga(containerIdStr, allowed.get(i), getRequestedIPID(container)); @@ -186,7 +196,8 @@ public List preStart(Container container) throws ResourceHa throw re; } catch (PrivilegedOperationException e) { allocator.cleanupAssignFpgas(containerIdStr); - cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, containerIdStr); + cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, + containerIdStr); LOG.warn("Could not update cgroup for container", e); throw new ResourceHandlerException(e); } @@ -200,7 +211,8 @@ public List preStart(Container container) throws ResourceHa } @Override - public List reacquireContainer(ContainerId containerId) throws ResourceHandlerException { + public List reacquireContainer(ContainerId containerId) + throws ResourceHandlerException { allocator.recoverAssignedFpgas(containerId); return null; } @@ -212,7 +224,8 @@ public List updateContainer(Container container) } @Override - public List postComplete(ContainerId containerId) throws ResourceHandlerException { + public List postComplete(ContainerId containerId) + throws ResourceHandlerException { allocator.cleanupAssignFpgas(containerId.toString()); cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, containerId.toString()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AbstractFpgaVendorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AbstractFpgaVendorPlugin.java index 4db97291c2..d238c67ca8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AbstractFpgaVendorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AbstractFpgaVendorPlugin.java @@ -21,10 +21,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; import java.util.List; import java.util.Map; @@ -38,7 +38,7 @@ @InterfaceAudience.Private @InterfaceStability.Unstable -public interface AbstractFpgaVendorPlugin extends Configurable{ +public interface AbstractFpgaVendorPlugin { /** * Check vendor's toolchain and required environment @@ -72,19 +72,14 @@ public interface AbstractFpgaVendorPlugin extends Configurable{ * localized file path and value is soft link names * @return The absolute path string of IP file * */ - String downloadIP(String id, String dstDir, Map> localizedResources); + String retrieveIPfilePath(String id, String dstDir, + Map> localizedResources); /** * The vendor plugin configure an IP file to a device * @param ipPath The absolute path of the IP file - * @param majorMinorNumber The device in format <major:minor> + * @param device The FPGA device object * @return configure device ok or not * */ - boolean configureIP(String ipPath, String majorMinorNumber); - - @Override - void setConf(Configuration conf); - - @Override - Configuration getConf(); + boolean configureIP(String ipPath, FpgaDevice device); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java new file mode 100644 index 0000000000..6d2f870a7b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/AoclDiagnosticOutputParser.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin.InnerShellExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class AoclDiagnosticOutputParser { + private AoclDiagnosticOutputParser() { + // no instances + } + + private static final Logger LOG = LoggerFactory.getLogger( + AoclDiagnosticOutputParser.class); + + /** + * One real sample output of Intel FPGA SDK 17.0's "aocl diagnose" is as below: + * " + * aocl diagnose: Running diagnose from /home/fpga/intelFPGA_pro/17.0/hld/board/nalla_pcie/linux64/libexec + * + * ------------------------- acl0 ------------------------- + * Vendor: Nallatech ltd + * + * Phys Dev Name Status Information + * + * aclnalla_pcie0Passed nalla_pcie (aclnalla_pcie0) + * PCIe dev_id = 2494, bus:slot.func = 02:00.00, Gen3 x8 + * FPGA temperature = 54.4 degrees C. + * Total Card Power Usage = 31.7 Watts. + * Device Power Usage = 0.0 Watts. + * + * DIAGNOSTIC_PASSED + * --------------------------------------------------------- + * " + * + * While per Intel's guide, the output(should be outdated or prior SDK version's) is as below: + * + * " + * aocl diagnose: Running diagnostic from ALTERAOCLSDKROOT/board/<board_name>/ + * <platform>/libexec + * Verified that the kernel mode driver is installed on the host machine. + * Using board package from vendor: <board_vendor_name> + * Querying information for all supported devices that are installed on the host + * machine ... + * + * device_name Status Information + * + * acl0 Passed <descriptive_board_name> + * PCIe dev_id = <device_ID>, bus:slot.func = 02:00.00, + * at Gen 2 with 8 lanes. + * FPGA temperature=43.0 degrees C. + * acl1 Passed <descriptive_board_name> + * PCIe dev_id = <device_ID>, bus:slot.func = 03:00.00, + * at Gen 2 with 8 lanes. + * FPGA temperature = 35.0 degrees C. + * + * Found 2 active device(s) installed on the host machine, to perform a full + * diagnostic on a specific device, please run aocl diagnose <device_name> + * + * DIAGNOSTIC_PASSED + * " + * But this method only support the first output + * */ + public static List parseDiagnosticOutput( + String output, InnerShellExecutor shellExecutor, String fpgaType) { + if (output.contains("DIAGNOSTIC_PASSED")) { + List devices = new ArrayList<>(); + Matcher headerStartMatcher = Pattern.compile("acl[0-31]") + .matcher(output); + Matcher headerEndMatcher = Pattern.compile("(?i)DIAGNOSTIC_PASSED") + .matcher(output); + int sectionStartIndex; + int sectionEndIndex; + String aliasName; + + while (headerStartMatcher.find()) { + sectionStartIndex = headerStartMatcher.end(); + String section = null; + aliasName = headerStartMatcher.group(); + while (headerEndMatcher.find(sectionStartIndex)) { + sectionEndIndex = headerEndMatcher.start(); + section = output.substring(sectionStartIndex, sectionEndIndex); + break; + } + + if (section == null) { + LOG.warn("Unsupported diagnose output"); + LOG.warn("aocl output is: " + output); + return Collections.emptyList(); + } + + // devName, \(.*\) + // busNum, bus:slot.func\s=\s.*, + // FPGA temperature\s=\s.* + // Total\sCard\sPower\sUsage\s=\s.* + String[] fieldRegexes = new String[]{"\\(.*\\)\n", + "(?i)bus:slot.func\\s=\\s.*,", + "(?i)FPGA temperature\\s=\\s.*", + "(?i)Total\\sCard\\sPower\\sUsage\\s=\\s.*"}; + String[] fields = new String[4]; + String tempFieldValue; + + for (int i = 0; i < fieldRegexes.length; i++) { + Matcher fieldMatcher = Pattern.compile(fieldRegexes[i]) + .matcher(section); + if (!fieldMatcher.find()) { + LOG.warn("Couldn't find " + fieldRegexes[i] + " pattern"); + fields[i] = ""; + continue; + } + tempFieldValue = fieldMatcher.group().trim(); + if (i == 0) { + // special case for Device name + fields[i] = tempFieldValue.substring(1, + tempFieldValue.length() - 1); + } else { + String ss = tempFieldValue.split("=")[1].trim(); + fields[i] = ss.substring(0, ss.length() - 1); + } + } + + String majorMinorNumber = shellExecutor + .getMajorAndMinorNumber(fields[0]); + if (null != majorMinorNumber) { + String[] mmn = majorMinorNumber.split(":"); + + devices.add(new FpgaDevice(fpgaType, + Integer.parseInt(mmn[0]), + Integer.parseInt(mmn[1]), null, + fields[0], aliasName, fields[1], fields[2], fields[3])); + } else { + LOG.warn("Failed to retrieve major/minor number for device"); + } + } + + return devices; + } else { + LOG.warn("The diagnostic has failed"); + LOG.warn("Output of aocl is: " + output); + return Collections.emptyList(); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/IntelFpgaOpenclPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/IntelFpgaOpenclPlugin.java index 1f9cba78fc..33c39ae282 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/IntelFpgaOpenclPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/IntelFpgaOpenclPlugin.java @@ -23,126 +23,118 @@ import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; /** * Intel FPGA for OpenCL plugin. * The key points are: - * 1. It uses Intel's toolchain "aocl" to discover devices/reprogram IP to the device - * before container launch to achieve a quickest reprogramming path + * 1. It uses Intel's toolchain "aocl" to discover devices/reprogram IP + * to the device before container launch to achieve a quickest + * reprogramming path * 2. It avoids reprogramming by maintaining a mapping of device to FPGA IP ID * 3. It assume IP file is distributed to container directory */ public class IntelFpgaOpenclPlugin implements AbstractFpgaVendorPlugin { - public static final Logger LOG = LoggerFactory.getLogger( + private static final Logger LOG = LoggerFactory.getLogger( IntelFpgaOpenclPlugin.class); private boolean initialized = false; - private Configuration conf; private InnerShellExecutor shell; - protected static final String DEFAULT_BINARY_NAME = "aocl"; + private static final String DEFAULT_BINARY_NAME = "aocl"; - protected static final String ALTERAOCLSDKROOT_NAME = "ALTERAOCLSDKROOT"; + private static final String ALTERAOCLSDKROOT_NAME = "ALTERAOCLSDKROOT"; + + private Function envProvider = System::getenv; private String pathToExecutable = null; - // a mapping of major:minor number to acl0-31 - private Map aliasMap; + @VisibleForTesting + void setInnerShellExecutor(InnerShellExecutor shellExecutor) { + this.shell = shellExecutor; + } + + @VisibleForTesting + String getPathToExecutable() { + return pathToExecutable; + } + + @VisibleForTesting + void setEnvProvider(Function envProvider) { + this.envProvider = envProvider; + } public IntelFpgaOpenclPlugin() { this.shell = new InnerShellExecutor(); } - public String getDefaultBinaryName() { - return DEFAULT_BINARY_NAME; - } - public String getDefaultPathToExecutable() { - return System.getenv(ALTERAOCLSDKROOT_NAME); - } - - public static String getDefaultPathEnvName() { - return ALTERAOCLSDKROOT_NAME; - } - - @VisibleForTesting - public String getPathToExecutable() { - return pathToExecutable; - } - - public void setPathToExecutable(String pathToExecutable) { - this.pathToExecutable = pathToExecutable; - } - - @VisibleForTesting - public void setShell(InnerShellExecutor shell) { - this.shell = shell; - } - - public Map getAliasMap() { - return aliasMap; + return envProvider.apply(ALTERAOCLSDKROOT_NAME); } /** - * Check the Intel FPGA for OpenCL toolchain + * Check the Intel FPGA for OpenCL toolchain. * */ @Override - public boolean initPlugin(Configuration conf) { - this.aliasMap = new HashMap<>(); - if (this.initialized) { + public boolean initPlugin(Configuration config) { + if (initialized) { return true; } // Find the proper toolchain, mainly aocl - String pluginDefaultBinaryName = getDefaultBinaryName(); - String pathToExecutable = conf.get(YarnConfiguration.NM_FPGA_PATH_TO_EXEC, - ""); - if (pathToExecutable.isEmpty()) { - pathToExecutable = pluginDefaultBinaryName; - } + String pluginDefaultBinaryName = DEFAULT_BINARY_NAME; + String executable = config.get(YarnConfiguration.NM_FPGA_PATH_TO_EXEC, + pluginDefaultBinaryName); + // Validate file existence - File binaryPath = new File(pathToExecutable); + File binaryPath = new File(executable); if (!binaryPath.exists()) { // When binary not exist, fail LOG.warn("Failed to find FPGA discoverer executable configured in " + YarnConfiguration.NM_FPGA_PATH_TO_EXEC + ", please check! Try default path"); - pathToExecutable = pluginDefaultBinaryName; + executable = pluginDefaultBinaryName; // Try to find in plugin's preferred path String pluginDefaultPreferredPath = getDefaultPathToExecutable(); if (null == pluginDefaultPreferredPath) { - LOG.warn("Failed to find FPGA discoverer executable from system environment " + - getDefaultPathEnvName()+ + LOG.warn("Failed to find FPGA discoverer executable from system " + + " environment " + ALTERAOCLSDKROOT_NAME + ", please check your environment!"); } else { - binaryPath = new File(pluginDefaultPreferredPath + "/bin", pluginDefaultBinaryName); + binaryPath = new File(pluginDefaultPreferredPath + "/bin", + pluginDefaultBinaryName); if (binaryPath.exists()) { - pathToExecutable = binaryPath.getAbsolutePath(); + executable = binaryPath.getAbsolutePath(); LOG.info("Succeed in finding FPGA discoverer executable: " + - pathToExecutable); + executable); } else { - pathToExecutable = pluginDefaultBinaryName; + executable = pluginDefaultBinaryName; LOG.warn("Failed to find FPGA discoverer executable in " + - pluginDefaultPreferredPath + ", file doesn't exists! Use default binary" + pathToExecutable); + pluginDefaultPreferredPath + + ", file doesn't exists! Use default binary" + executable); } } } - setPathToExecutable(pathToExecutable); + + pathToExecutable = executable; + if (!diagnose(10*1000)) { LOG.warn("Intel FPGA for OpenCL diagnose failed!"); - this.initialized = false; + initialized = false; } else { - this.initialized = true; + initialized = true; } - return this.initialized; + return initialized; } @Override @@ -153,10 +145,16 @@ public List discover(int timeout) { if (null == output) { return list; } - parseDiagnoseInfo(output, list); + + list = AoclDiagnosticOutputParser.parseDiagnosticOutput(output, + shell, getFpgaType()); + return list; } + /** + * Helper class to run aocl diagnose & determine major/minor numbers. + */ public static class InnerShellExecutor { // ls /dev/ @@ -170,13 +168,13 @@ public String getMajorAndMinorNumber(String devName) { shexec.execute(); String[] strs = shexec.getOutput().trim().split(":"); LOG.debug("stat output:" + shexec.getOutput()); - output = Integer.parseInt(strs[0], 16) + ":" + Integer.parseInt(strs[1], 16); + output = Integer.parseInt(strs[0], 16) + ":" + + Integer.parseInt(strs[1], 16); } catch (IOException e) { - String msg = - "Failed to get major-minor number from reading /dev/" + devName; - LOG.warn(msg); - LOG.debug("Command output:" + shexec.getOutput() + ", exit code:" + - shexec.getExitCode()); + LOG.warn("Failed to get major-minor number from reading /dev/" + + devName); + LOG.warn("Command output:" + shexec.getOutput() + ", exit code: " + + shexec.getExitCode(), e); } return output; } @@ -184,7 +182,7 @@ public String getMajorAndMinorNumber(String devName) { public String runDiagnose(String binary, int timeout) { String output = null; Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor( - new String[]{binary, "diagnose"}); + new String[]{binary, "diagnose"}, null, null, timeout); try { shexec.execute(); } catch (IOException e) { @@ -198,112 +196,6 @@ public String runDiagnose(String binary, int timeout) { } return shexec.getOutput(); } - - } - - /** - * One real sample output of Intel FPGA SDK 17.0's "aocl diagnose" is as below: - * " - * aocl diagnose: Running diagnose from /home/fpga/intelFPGA_pro/17.0/hld/board/nalla_pcie/linux64/libexec - * - * ------------------------- acl0 ------------------------- - * Vendor: Nallatech ltd - * - * Phys Dev Name Status Information - * - * aclnalla_pcie0Passed nalla_pcie (aclnalla_pcie0) - * PCIe dev_id = 2494, bus:slot.func = 02:00.00, Gen3 x8 - * FPGA temperature = 54.4 degrees C. - * Total Card Power Usage = 31.7 Watts. - * Device Power Usage = 0.0 Watts. - * - * DIAGNOSTIC_PASSED - * --------------------------------------------------------- - * " - * - * While per Intel's guide, the output(should be outdated or prior SDK version's) is as below: - * - * " - * aocl diagnose: Running diagnostic from ALTERAOCLSDKROOT/board/<board_name>/ - * <platform>/libexec - * Verified that the kernel mode driver is installed on the host machine. - * Using board package from vendor: <board_vendor_name> - * Querying information for all supported devices that are installed on the host - * machine ... - * - * device_name Status Information - * - * acl0 Passed <descriptive_board_name> - * PCIe dev_id = <device_ID>, bus:slot.func = 02:00.00, - * at Gen 2 with 8 lanes. - * FPGA temperature=43.0 degrees C. - * acl1 Passed <descriptive_board_name> - * PCIe dev_id = <device_ID>, bus:slot.func = 03:00.00, - * at Gen 2 with 8 lanes. - * FPGA temperature = 35.0 degrees C. - * - * Found 2 active device(s) installed on the host machine, to perform a full - * diagnostic on a specific device, please run aocl diagnose <device_name> - * - * DIAGNOSTIC_PASSED - * " - * But this method only support the first output - * */ - public void parseDiagnoseInfo(String output, List list) { - if (output.contains("DIAGNOSTIC_PASSED")) { - Matcher headerStartMatcher = Pattern.compile("acl[0-31]").matcher(output); - Matcher headerEndMatcher = Pattern.compile("(?i)DIAGNOSTIC_PASSED").matcher(output); - int sectionStartIndex; - int sectionEndIndex; - String aliasName; - while (headerStartMatcher.find()) { - sectionStartIndex = headerStartMatcher.end(); - String section = null; - aliasName = headerStartMatcher.group(); - while (headerEndMatcher.find(sectionStartIndex)) { - sectionEndIndex = headerEndMatcher.start(); - section = output.substring(sectionStartIndex, sectionEndIndex); - break; - } - if (null == section) { - LOG.warn("Unsupported diagnose output"); - return; - } - // devName, \(.*\) - // busNum, bus:slot.func\s=\s.*, - // FPGA temperature\s=\s.* - // Total\sCard\sPower\sUsage\s=\s.* - String[] fieldRegexes = new String[]{"\\(.*\\)\n", "(?i)bus:slot.func\\s=\\s.*,", - "(?i)FPGA temperature\\s=\\s.*", "(?i)Total\\sCard\\sPower\\sUsage\\s=\\s.*"}; - String[] fields = new String[4]; - String tempFieldValue; - for (int i = 0; i < fieldRegexes.length; i++) { - Matcher fieldMatcher = Pattern.compile(fieldRegexes[i]).matcher(section); - if (!fieldMatcher.find()) { - LOG.warn("Couldn't find " + fieldRegexes[i] + " pattern"); - fields[i] = ""; - continue; - } - tempFieldValue = fieldMatcher.group().trim(); - if (i == 0) { - // special case for Device name - fields[i] = tempFieldValue.substring(1, tempFieldValue.length() - 1); - } else { - String ss = tempFieldValue.split("=")[1].trim(); - fields[i] = ss.substring(0, ss.length() - 1); - } - } - String majorMinorNumber = this.shell.getMajorAndMinorNumber(fields[0]); - if (null != majorMinorNumber) { - String[] mmn = majorMinorNumber.split(":"); - this.aliasMap.put(majorMinorNumber, aliasName); - list.add(new FpgaResourceAllocator.FpgaDevice(getFpgaType(), - Integer.parseInt(mmn[0]), - Integer.parseInt(mmn[1]), null, - fields[0], aliasName, fields[1], fields[2], fields[3])); - } - }// end while - }// end if } public String getDiagnoseInfo(int timeout) { @@ -328,72 +220,78 @@ public String getFpgaType() { } @Override - public String downloadIP(String id, String dstDir, Map> localizedResources) { + public String retrieveIPfilePath(String id, String dstDir, + Map> localizedResources) { // Assume .aocx IP file is distributed by DS to local dir - String r = ""; - Path path; - LOG.info("Got environment: " + id + ", search IP file in localized resources"); + String ipFilePath = null; + + LOG.info("Got environment: " + id + + ", search IP file in localized resources"); if (null == id || id.isEmpty()) { LOG.warn("IP_ID environment is empty, skip downloading"); - return r; + return null; } + if (localizedResources != null) { - for (Map.Entry> resourceEntry : - localizedResources.entrySet()) { - path = resourceEntry.getKey(); - LOG.debug("Check:" + path.toUri().toString()); - if (path.getName().toLowerCase().contains(id.toLowerCase()) && path.getName().endsWith(".aocx")) { - r = path.toUri().toString(); - LOG.debug("Found: " + r); - break; - } + Optional aocxPath = localizedResources + .keySet() + .stream() + .filter(path -> matchesIpid(path, id)) + .findFirst(); + + if (aocxPath.isPresent()) { + ipFilePath = aocxPath.get().toUri().toString(); + LOG.debug("Found: " + ipFilePath); } } else { LOG.warn("Localized resource is null!"); } - return r; + + return ipFilePath; + } + + private boolean matchesIpid(Path p, String id) { + return p.getName().toLowerCase().equals(id.toLowerCase()) + && p.getName().endsWith(".aocx"); } /** * Program one device. - * It's ok for the offline "aocl program" failed because the application will always invoke API to program - * The reason we do offline reprogramming is to make the application's program process faster + * It's ok for the offline "aocl program" failed because the application will + * always invoke API to program. + * The reason we do offline reprogramming is to make the application's + * program process faster. * @param ipPath the absolute path to the aocx IP file - * @param majorMinorNumber major:minor string - * @return True or False + * @param device Fpga device object which represents the card + * @return false if programming the card fails * */ @Override - public boolean configureIP(String ipPath, String majorMinorNumber) { + public boolean configureIP(String ipPath, FpgaDevice device) { // perform offline program the IP to get a quickest reprogramming sequence - // we need a mapping of "major:minor" to "acl0" to issue command "aocl program " + // we need a mapping of "major:minor" to "acl0" to + // issue command "aocl program " Shell.ShellCommandExecutor shexec; String aclName; - aclName = this.aliasMap.get(majorMinorNumber); + aclName = device.getAliasDevName(); shexec = new Shell.ShellCommandExecutor( new String[]{this.pathToExecutable, "program", aclName, ipPath}); try { shexec.execute(); if (0 == shexec.getExitCode()) { LOG.debug(shexec.getOutput()); - LOG.info("Intel aocl program " + ipPath + " to " + aclName + " successfully"); + LOG.info("Intel aocl program " + ipPath + " to " + + aclName + " successfully"); } else { + LOG.error("Device programming failed, aocl output is:"); + LOG.error(shexec.getOutput()); return false; } } catch (IOException e) { - LOG.error("Intel aocl program " + ipPath + " to " + aclName + " failed!"); - e.printStackTrace(); + LOG.error("Intel aocl program " + ipPath + " to " + + aclName + " failed!", e); + LOG.error("Aocl output: " + shexec.getOutput()); return false; } return true; } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public Configuration getConf() { - return this.conf; - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/package-info.java new file mode 100644 index 0000000000..4dceb786f3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/package-info.java @@ -0,0 +1,19 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga; \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java index 499c1e14ef..1048fecd10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/fpga/TestFpgaResourceHandler.java @@ -18,9 +18,35 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -30,6 +56,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.FpgaDiscoverer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin; @@ -38,15 +65,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mockito.Mockito; - - -import java.io.IOException; -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; - -import static org.mockito.Mockito.*; - public class TestFpgaResourceHandler { private Context mockContext; @@ -95,16 +113,20 @@ public void testBootstrap() throws ResourceHandlerException { verify(mockVendorPlugin, times(1)).initPlugin(configuration); verify(mockCGroupsHandler, times(1)).initializeCGroupController( CGroupsHandler.CGroupController.DEVICES); - Assert.assertEquals(5, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); - Assert.assertEquals(5, fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); + Assert.assertEquals(5, fpgaResourceHandler.getFpgaAllocator() + .getAvailableFpgaCount()); + Assert.assertEquals(5, fpgaResourceHandler.getFpgaAllocator() + .getAllowedFpga().size()); // Case 2. subset of devices fpgaResourceHandler = new FpgaResourceHandlerImpl(mockContext, mockCGroupsHandler, mockPrivilegedExecutor, mockVendorPlugin); allowed = "0,1,2"; configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, allowed); fpgaResourceHandler.bootstrap(configuration); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); - List allowedDevices = fpgaResourceHandler.getFpgaAllocator().getAllowedFpga(); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); + List allowedDevices = + fpgaResourceHandler.getFpgaAllocator().getAllowedFpga(); for (String s : allowed.split(",")) { boolean check = false; for (FpgaResourceAllocator.FpgaDevice device : allowedDevices) { @@ -114,7 +136,8 @@ public void testBootstrap() throws ResourceHandlerException { } Assert.assertTrue("Minor:" + s +"found", check); } - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 3. User configuration contains invalid minor device number fpgaResourceHandler = new FpgaResourceHandlerImpl(mockContext, @@ -122,18 +145,23 @@ public void testBootstrap() throws ResourceHandlerException { allowed = "0,1,7"; configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, allowed); fpgaResourceHandler.bootstrap(configuration); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); } @Test - public void testBootstrapWithInvalidUserConfiguration() throws ResourceHandlerException { + public void testBootstrapWithInvalidUserConfiguration() + throws ResourceHandlerException { // User configuration contains invalid minor device number String allowed = "0,1,7"; configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, allowed); fpgaResourceHandler.bootstrap(configuration); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getAllowedFpga().size()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); String[] invalidAllowedStrings = {"a,1,2,", "a,1,2", "0,1,2,#", "a", "1,"}; for (String s : invalidAllowedStrings) { @@ -161,7 +189,8 @@ public void testBootstrapWithInvalidUserConfiguration() throws ResourceHandlerEx } @Test - public void testBootStrapWithEmptyUserConfiguration() throws ResourceHandlerException { + public void testBootStrapWithEmptyUserConfiguration() + throws ResourceHandlerException { // User configuration contains invalid minor device number String allowed = ""; boolean invalidConfiguration = false; @@ -175,7 +204,8 @@ public void testBootStrapWithEmptyUserConfiguration() throws ResourceHandlerExce } @Test - public void testAllocationWithPreference() throws ResourceHandlerException, PrivilegedOperationException { + public void testAllocationWithPreference() + throws ResourceHandlerException, PrivilegedOperationException { configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, "0,1,2"); fpgaResourceHandler.bootstrap(configuration); // Case 1. The id-0 container request 1 FPGA of IntelOpenCL type and GEMM IP @@ -197,18 +227,24 @@ public void testAllocationWithPreference() throws ResourceHandlerException, Priv Assert.assertTrue(flag); // Case 3. Release the id-0 container fpgaResourceHandler.postComplete(getContainerId(0)); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Now we have enough devices, re-allocate for the id-1 container fpgaResourceHandler.preStart(mockContainer(1, 3, "GEMM")); // Id-1 container should have 0 denied devices verifyDeniedDevices(getContainerId(1), new ArrayList<>()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Release container id-1 fpgaResourceHandler.postComplete(getContainerId(1)); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 4. Now all 3 devices should have IPID GEMM // Try container id-2 and id-3 fpgaResourceHandler.preStart(mockContainer(2, 1, "GZIP")); @@ -221,18 +257,24 @@ public void testAllocationWithPreference() throws ResourceHandlerException, Priv for (FpgaResourceAllocator.FpgaDevice device : list) { Assert.assertEquals("IPID should be GEMM", "GEMM", device.getIPID()); } - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(1, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(1, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); fpgaResourceHandler.postComplete(getContainerId(3)); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 5. id-4 request 0 FPGA device fpgaResourceHandler.preStart(mockContainer(4, 0, "")); // Deny all devices for id-4 verifyDeniedDevices(getContainerId(4), Arrays.asList(0, 1, 2)); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 6. id-5 with invalid FPGA device try { @@ -243,53 +285,68 @@ public void testAllocationWithPreference() throws ResourceHandlerException, Priv } @Test - public void testsAllocationWithExistingIPIDDevices() throws ResourceHandlerException, PrivilegedOperationException { + public void testsAllocationWithExistingIPIDDevices() + throws ResourceHandlerException, PrivilegedOperationException { configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, "0,1,2"); fpgaResourceHandler.bootstrap(configuration); // The id-0 container request 3 FPGA of IntelOpenCL type and GEMM IP fpgaResourceHandler.preStart(mockContainer(0, 3, "GEMM")); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - List list = fpgaResourceHandler.getFpgaAllocator() - .getUsedFpga().get(getContainerId(0).toString()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + List list = + fpgaResourceHandler + .getFpgaAllocator() + .getUsedFpga() + .get(getContainerId(0).toString()); fpgaResourceHandler.postComplete(getContainerId(0)); for (FpgaResourceAllocator.FpgaDevice device : list) { - Assert.assertEquals("IP should be updated to GEMM", "GEMM", device.getIPID()); + Assert.assertEquals("IP should be updated to GEMM", "GEMM", + device.getIPID()); } // Case 1. id-1 container request preStart, with no plugin.configureIP called fpgaResourceHandler.preStart(mockContainer(1, 1, "GEMM")); fpgaResourceHandler.preStart(mockContainer(2, 1, "GEMM")); // we should have 3 times due to id-1 skip 1 invocation - verify(mockVendorPlugin, times(3)).configureIP(anyString(),anyString()); + verify(mockVendorPlugin, times(3)).configureIP(anyString(), + any(FpgaDevice.class)); fpgaResourceHandler.postComplete(getContainerId(1)); fpgaResourceHandler.postComplete(getContainerId(2)); // Case 2. id-2 container request preStart, with 1 plugin.configureIP called fpgaResourceHandler.preStart(mockContainer(1, 1, "GZIP")); // we should have 4 times invocation - verify(mockVendorPlugin, times(4)).configureIP(anyString(),anyString()); + verify(mockVendorPlugin, times(4)).configureIP(anyString(), + any(FpgaDevice.class)); } @Test - public void testAllocationWithZeroDevices() throws ResourceHandlerException, PrivilegedOperationException { + public void testAllocationWithZeroDevices() + throws ResourceHandlerException, PrivilegedOperationException { configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, "0,1,2"); fpgaResourceHandler.bootstrap(configuration); // The id-0 container request 0 FPGA fpgaResourceHandler.preStart(mockContainer(0, 0, null)); verifyDeniedDevices(getContainerId(0), Arrays.asList(0, 1, 2)); - verify(mockVendorPlugin, times(0)).downloadIP(anyString(), anyString(), anyMap()); - verify(mockVendorPlugin, times(0)).configureIP(anyString(), anyString()); + verify(mockVendorPlugin, times(0)).retrieveIPfilePath(anyString(), + anyString(), anyMap()); + verify(mockVendorPlugin, times(0)).configureIP(anyString(), + any(FpgaDevice.class)); } @Test - public void testStateStore() throws ResourceHandlerException, IOException { + public void testStateStore() + throws ResourceHandlerException, IOException { // Case 1. store 3 devices configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, "0,1,2"); fpgaResourceHandler.bootstrap(configuration); Container container0 = mockContainer(0, 3, "GEMM"); fpgaResourceHandler.preStart(container0); List assigned = - fpgaResourceHandler.getFpgaAllocator().getUsedFpga().get(getContainerId(0).toString()); + fpgaResourceHandler + .getFpgaAllocator() + .getUsedFpga() + .get(getContainerId(0).toString()); verify(mockNMStateStore).storeAssignedResources(container0, ResourceInformation.FPGA_URI, new ArrayList<>(assigned)); @@ -303,22 +360,26 @@ public void testStateStore() throws ResourceHandlerException, IOException { @Test public void testReacquireContainer() throws ResourceHandlerException { - Container c0 = mockContainer(0, 2, "GEMM"); List assigned = new ArrayList<>(); - assigned.add(new FpgaResourceAllocator.FpgaDevice(vendorType, 247, 0, null)); - assigned.add(new FpgaResourceAllocator.FpgaDevice(vendorType, 247, 1, null)); + assigned.add(new + FpgaResourceAllocator.FpgaDevice(vendorType, 247, 0, null)); + assigned.add(new + FpgaResourceAllocator.FpgaDevice(vendorType, 247, 1, null)); // Mock we've stored the c0 states mockStateStoreForContainer(c0, assigned); // NM start configuration.set(YarnConfiguration.NM_FPGA_ALLOWED_DEVICES, "0,1,2"); fpgaResourceHandler.bootstrap(configuration); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 1. try recover state for id-0 container fpgaResourceHandler.reacquireContainer(getContainerId(0)); // minor number matches - List used = fpgaResourceHandler.getFpgaAllocator(). + List used = + fpgaResourceHandler.getFpgaAllocator(). getUsedFpga().get(getContainerId(0).toString()); int count = 0; for (FpgaResourceAllocator.FpgaDevice device : used) { @@ -330,21 +391,26 @@ public void testReacquireContainer() throws ResourceHandlerException { } } Assert.assertEquals("Unexpected used minor number in allocator",2, count); - List available = fpgaResourceHandler.getFpgaAllocator(). - getAvailableFpga().get(vendorType); + List available = + fpgaResourceHandler + .getFpgaAllocator() + .getAvailableFpga() + .get(vendorType); count = 0; for (FpgaResourceAllocator.FpgaDevice device : available) { if (device.getMinor().equals(2)) { count++; } } - Assert.assertEquals("Unexpected available minor number in allocator", 1, count); + Assert.assertEquals("Unexpected available minor number in allocator", + 1, count); // Case 2. Recover a not allowed device with minor number 5 Container c1 = mockContainer(1, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new FpgaResourceAllocator.FpgaDevice(vendorType, 247, 5, null)); + assigned.add(new + FpgaResourceAllocator.FpgaDevice(vendorType, 247, 5, null)); // Mock we've stored the c1 states mockStateStoreForContainer(c1, assigned); boolean flag = false; @@ -354,13 +420,16 @@ public void testReacquireContainer() throws ResourceHandlerException { flag = true; } Assert.assertTrue(flag); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(1, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(1, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 3. recover a already used device by other container Container c2 = mockContainer(2, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new FpgaResourceAllocator.FpgaDevice(vendorType, 247, 1, null)); + assigned.add(new + FpgaResourceAllocator.FpgaDevice(vendorType, 247, 1, null)); // Mock we've stored the c2 states mockStateStoreForContainer(c2, assigned); flag = false; @@ -370,18 +439,23 @@ public void testReacquireContainer() throws ResourceHandlerException { flag = true; } Assert.assertTrue(flag); - Assert.assertEquals(2, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(1, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(2, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(1, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); // Case 4. recover a normal container c3 with remaining minor device number 2 Container c3 = mockContainer(3, 1, "GEMM"); assigned = new ArrayList<>(); - assigned.add(new FpgaResourceAllocator.FpgaDevice(vendorType, 247, 2, null)); + assigned.add(new + FpgaResourceAllocator.FpgaDevice(vendorType, 247, 2, null)); // Mock we've stored the c2 states mockStateStoreForContainer(c3, assigned); fpgaResourceHandler.reacquireContainer(getContainerId(3)); - Assert.assertEquals(3, fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); - Assert.assertEquals(0, fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); + Assert.assertEquals(3, + fpgaResourceHandler.getFpgaAllocator().getUsedFpgaCount()); + Assert.assertEquals(0, + fpgaResourceHandler.getFpgaAllocator().getAvailableFpgaCount()); } private void verifyDeniedDevices(ContainerId containerId, @@ -405,14 +479,16 @@ private void verifyDeniedDevices(ContainerId containerId, } } - private static IntelFpgaOpenclPlugin mockPlugin(String type, List list) { + private static IntelFpgaOpenclPlugin mockPlugin(String type, + List list) { IntelFpgaOpenclPlugin plugin = mock(IntelFpgaOpenclPlugin.class); - when(plugin.initPlugin(Mockito.any())).thenReturn(true); + when(plugin.initPlugin(any())).thenReturn(true); when(plugin.getFpgaType()).thenReturn(type); - when(plugin.downloadIP(Mockito.anyString(), Mockito.anyString(), Mockito.anyMap())).thenReturn("/tmp"); - when(plugin.configureIP(Mockito.anyString(), Mockito.any())) + when(plugin.retrieveIPfilePath(anyString(), + anyString(), anyMap())).thenReturn("/tmp"); + when(plugin.configureIP(anyString(), any())) .thenReturn(true); - when(plugin.discover(Mockito.anyInt())).thenReturn(list); + when(plugin.discover(anyInt())).thenReturn(list); return plugin; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java new file mode 100644 index 0000000000..c83e6b14aa --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestAoclOutputParser.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.List; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin.InnerShellExecutor; +import org.junit.Test; + +/** + * Tests for AoclDiagnosticOutputParser. + */ +@SuppressWarnings("checkstyle:linelength") +public class TestAoclOutputParser { + + @Test + public void testParsing() { + String output = "------------------------- acl0 -------------------------\n" + + "Vendor: Nallatech ltd\n" + + "Phys Dev Name Status Information\n" + + "aclnalla_pcie0Passed nalla_pcie (aclnalla_pcie0)\n" + + " PCIe dev_id = 2494, bus:slot.func = 02:00.00, Gen3 x8\n" + + " FPGA temperature = 53.1 degrees C.\n" + + " Total Card Power Usage = 31.7 Watts.\n" + + " Device Power Usage = 0.0 Watts.\n" + + "DIAGNOSTIC_PASSED" + + "---------------------------------------------------------\n"; + output = output + + "------------------------- acl1 -------------------------\n" + + "Vendor: Nallatech ltd\n" + + "Phys Dev Name Status Information\n" + + "aclnalla_pcie1Passed nalla_pcie (aclnalla_pcie1)\n" + + " PCIe dev_id = 2495, bus:slot.func = 03:00.00, Gen3 x8\n" + + " FPGA temperature = 43.1 degrees C.\n" + + " Total Card Power Usage = 11.7 Watts.\n" + + " Device Power Usage = 0.0 Watts.\n" + + "DIAGNOSTIC_PASSED" + + "---------------------------------------------------------\n"; + output = output + + "------------------------- acl2 -------------------------\n" + + "Vendor: Intel(R) Corporation\n" + + "\n" + + "Phys Dev Name Status Information\n" + + "\n" + + "acla10_ref0 Passed Arria 10 Reference Platform (acla10_ref0)\n" + + " PCIe dev_id = 2494, bus:slot.func = 09:00.00, Gen2 x8\n" + + " FPGA temperature = 50.5781 degrees C.\n" + + "\n" + + "DIAGNOSTIC_PASSED\n" + + "---------------------------------------------------------\n"; + InnerShellExecutor shellExecutor = mock(InnerShellExecutor.class); + when(shellExecutor.getMajorAndMinorNumber("aclnalla_pcie0")) + .thenReturn("247:0"); + when(shellExecutor.getMajorAndMinorNumber("aclnalla_pcie1")) + .thenReturn("247:1"); + when(shellExecutor.getMajorAndMinorNumber("acla10_ref0")) + .thenReturn("246:0"); + + List devices = + AoclDiagnosticOutputParser.parseDiagnosticOutput( + output, shellExecutor, "IntelOpenCL"); + + assertEquals(3, devices.size()); + assertEquals("IntelOpenCL", devices.get(0).getType()); + assertEquals("247", devices.get(0).getMajor().toString()); + assertEquals("0", devices.get(0).getMinor().toString()); + assertEquals("acl0", devices.get(0).getAliasDevName()); + assertEquals("aclnalla_pcie0", devices.get(0).getDevName()); + assertEquals("02:00.00", devices.get(0).getBusNum()); + assertEquals("53.1 degrees C", devices.get(0).getTemperature()); + assertEquals("31.7 Watts", devices.get(0).getCardPowerUsage()); + + assertEquals("IntelOpenCL", devices.get(1).getType()); + assertEquals("247", devices.get(1).getMajor().toString()); + assertEquals("1", devices.get(1).getMinor().toString()); + assertEquals("acl1", devices.get(1).getAliasDevName()); + assertEquals("aclnalla_pcie1", devices.get(1).getDevName()); + assertEquals("03:00.00", devices.get(1).getBusNum()); + assertEquals("43.1 degrees C", devices.get(1).getTemperature()); + assertEquals("11.7 Watts", devices.get(1).getCardPowerUsage()); + + assertEquals("IntelOpenCL", devices.get(2).getType()); + assertEquals("246", devices.get(2).getMajor().toString()); + assertEquals("0", devices.get(2).getMinor().toString()); + assertEquals("acl2", devices.get(2).getAliasDevName()); + assertEquals("acla10_ref0", devices.get(2).getDevName()); + assertEquals("09:00.00", devices.get(2).getBusNum()); + assertEquals("50.5781 degrees C", devices.get(2).getTemperature()); + assertEquals("", devices.get(2).getCardPowerUsage()); + + // Case 2. check alias map + assertEquals("acl0", devices.get(0).getAliasDevName()); + assertEquals("acl1", devices.get(1).getAliasDevName()); + assertEquals("acl2", devices.get(2).getAliasDevName()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java index f2b92d2457..870661025d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/fpga/TestFpgaDiscoverer.java @@ -20,17 +20,11 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import java.io.File; import java.io.FileOutputStream; @@ -38,16 +32,20 @@ import java.lang.reflect.Field; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; -import static org.junit.Assert.assertEquals; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.fpga.FpgaResourceAllocator.FpgaDevice; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; public class TestFpgaDiscoverer { @Rule @@ -114,7 +112,7 @@ public void testLinuxFpgaResourceDiscoverPluginConfig() throws Exception { // FpgaDiscoverer.getInstance().diagnose() work in openclPlugin.initPlugin() discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); // Case 1. No configuration set for binary(no environment "ALTERAOCLSDKROOT" set) @@ -157,89 +155,6 @@ public void testLinuxFpgaResourceDiscoverPluginConfig() throws Exception { } - @Test - public void testDiscoverPluginParser() throws YarnException { - String output = "------------------------- acl0 -------------------------\n" + - "Vendor: Nallatech ltd\n" + - "Phys Dev Name Status Information\n" + - "aclnalla_pcie0Passed nalla_pcie (aclnalla_pcie0)\n" + - " PCIe dev_id = 2494, bus:slot.func = 02:00.00, Gen3 x8\n" + - " FPGA temperature = 53.1 degrees C.\n" + - " Total Card Power Usage = 31.7 Watts.\n" + - " Device Power Usage = 0.0 Watts.\n" + - "DIAGNOSTIC_PASSED" + - "---------------------------------------------------------\n"; - output = output + - "------------------------- acl1 -------------------------\n" + - "Vendor: Nallatech ltd\n" + - "Phys Dev Name Status Information\n" + - "aclnalla_pcie1Passed nalla_pcie (aclnalla_pcie1)\n" + - " PCIe dev_id = 2495, bus:slot.func = 03:00.00, Gen3 x8\n" + - " FPGA temperature = 43.1 degrees C.\n" + - " Total Card Power Usage = 11.7 Watts.\n" + - " Device Power Usage = 0.0 Watts.\n" + - "DIAGNOSTIC_PASSED" + - "---------------------------------------------------------\n"; - output = output + - "------------------------- acl2 -------------------------\n" + - "Vendor: Intel(R) Corporation\n" + - "\n" + - "Phys Dev Name Status Information\n" + - "\n" + - "acla10_ref0 Passed Arria 10 Reference Platform (acla10_ref0)\n" + - " PCIe dev_id = 2494, bus:slot.func = 09:00.00, Gen2 x8\n" + - " FPGA temperature = 50.5781 degrees C.\n" + - "\n" + - "DIAGNOSTIC_PASSED\n" + - "---------------------------------------------------------\n"; - Configuration conf = new Configuration(false); - IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); - FpgaDiscoverer.getInstance().setResourceHanderPlugin(openclPlugin); - - openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); - - FpgaDiscoverer.getInstance().initialize(conf); - - List list = new LinkedList<>(); - - // Case 1. core parsing - openclPlugin.parseDiagnoseInfo(output, list); - assertEquals(3, list.size()); - assertEquals("IntelOpenCL", list.get(0).getType()); - assertEquals("247", list.get(0).getMajor().toString()); - assertEquals("0", list.get(0).getMinor().toString()); - assertEquals("acl0", list.get(0).getAliasDevName()); - assertEquals("aclnalla_pcie0", list.get(0).getDevName()); - assertEquals("02:00.00", list.get(0).getBusNum()); - assertEquals("53.1 degrees C", list.get(0).getTemperature()); - assertEquals("31.7 Watts", list.get(0).getCardPowerUsage()); - - assertEquals("IntelOpenCL", list.get(1).getType()); - assertEquals("247", list.get(1).getMajor().toString()); - assertEquals("1", list.get(1).getMinor().toString()); - assertEquals("acl1", list.get(1).getAliasDevName()); - assertEquals("aclnalla_pcie1", list.get(1).getDevName()); - assertEquals("03:00.00", list.get(1).getBusNum()); - assertEquals("43.1 degrees C", list.get(1).getTemperature()); - assertEquals("11.7 Watts", list.get(1).getCardPowerUsage()); - - assertEquals("IntelOpenCL", list.get(2).getType()); - assertEquals("246", list.get(2).getMajor().toString()); - assertEquals("0", list.get(2).getMinor().toString()); - assertEquals("acl2", list.get(2).getAliasDevName()); - assertEquals("acla10_ref0", list.get(2).getDevName()); - assertEquals("09:00.00", list.get(2).getBusNum()); - assertEquals("50.5781 degrees C", list.get(2).getTemperature()); - assertEquals("", list.get(2).getCardPowerUsage()); - - // Case 2. check alias map - Map aliasMap = openclPlugin.getAliasMap(); - assertEquals("acl0", aliasMap.get("247:0")); - assertEquals("acl1", aliasMap.get("247:1")); - assertEquals("acl2", aliasMap.get("246:0")); - } - @Test public void testDiscoveryWhenAvailableDevicesDefined() throws YarnException { @@ -251,7 +166,7 @@ public void testDiscoveryWhenAvailableDevicesDefined() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); List devices = discoverer.discover(); @@ -282,7 +197,7 @@ public void testDiscoveryWhenAvailableDevicesEmpty() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); discoverer.discover(); @@ -302,7 +217,7 @@ public void testDiscoveryWhenAvailableDevicesAreIllegalString() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); discoverer.discover(); @@ -319,7 +234,7 @@ public void testDiscoveryWhenExternalScriptDefined() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.setScriptRunner(s -> { return Optional.of("acl0/243:0,acl1/244:1"); }); @@ -352,7 +267,7 @@ public void testDiscoveryWhenExternalScriptReturnsEmptyString() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.setScriptRunner(s -> { return Optional.of(""); }); @@ -361,7 +276,6 @@ public void testDiscoveryWhenExternalScriptReturnsEmptyString() } @Test - public void testDiscoveryWhenExternalScriptFails() throws YarnException { expected.expect(ResourceHandlerException.class); @@ -375,7 +289,7 @@ public void testDiscoveryWhenExternalScriptFails() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.setScriptRunner(s -> { return Optional.empty(); }); @@ -396,7 +310,7 @@ public void testDiscoveryWhenExternalScriptUndefined() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); discoverer.discover(); @@ -421,7 +335,7 @@ public void testDiscoveryWhenExternalScriptCannotBeExecuted() IntelFpgaOpenclPlugin openclPlugin = new IntelFpgaOpenclPlugin(); discoverer.setResourceHanderPlugin(openclPlugin); openclPlugin.initPlugin(conf); - openclPlugin.setShell(mockPuginShell()); + openclPlugin.setInnerShellExecutor(mockPuginShell()); discoverer.initialize(conf); discoverer.discover();