diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 4a0f0057f1..bcd48fc118 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -4,10 +4,6 @@ Trunk (unreleased changes) INCOMPATIBLE CHANGES - HADOOP-7542. Change Configuration XML format to 1.1 to add support for - serializing additional characters. This requires XML1.1 - support in the XML parser (Christopher Egner via harsh) - IMPROVEMENTS HADOOP-7595. Upgrade dependency to Avro 1.5.3. (Alejandro Abdelnur via atm) @@ -21,10 +17,19 @@ Trunk (unreleased changes) close (atm) HADOOP-7668. Add a NetUtils method that can tell if an InetAddress - belongs to local host. (suresh) + belongs to local host. (suresh) HADOOP-7687 Make getProtocolSignature public (sanjay) + HADOOP-7693. Enhance AvroRpcEngine to support the new #addProtocol + interface introduced in HADOOP-7524. (cutting) + + HADOOP-7716. RPC protocol registration on SS does not log the protocol name + (only the class which may be different) (sanjay) + + HADOOP-7717. Move handling of concurrent client fail-overs to + RetryInvocationHandler (atm) + BUGS HADOOP-7606. Upgrade Jackson to version 1.7.1 to match the version required @@ -42,7 +47,12 @@ Trunk (unreleased changes) HADOOP-6220. HttpServer wraps InterruptedExceptions by IOExceptions if interrupted in startup (stevel) - + + HADOOP-7703. Improved excpetion handling of shutting down web server. + (Devaraj K via Eric Yang) + + HADOOP-7704. Reduce number of object created by JMXJsonServlet. + (Devaraj K via Eric Yang) Release 0.23.0 - Unreleased @@ -413,6 +423,18 @@ Release 0.23.0 - Unreleased HADOOP-7575. Enhanced LocalDirAllocator to support fully-qualified paths. (Jonathan Eagles via vinodkv) + HADOOP-7469 Add a standard handler for socket connection problems which + improves diagnostics (Uma Maheswara Rao G and stevel via stevel) + + HADOOP-7710. Added hadoop-setup-application.sh for creating + application directory (Arpit Gupta via Eric Yang) + + HADOOP-7707. Added toggle for dfs.support.append, webhdfs and hadoop proxy + user to setup config script. (Arpit Gupta via Eric Yang) + + HADOOP-7720. Added parameter for HBase user to setup config script. + (Arpit Gupta via Eric Yang) + OPTIMIZATIONS HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole @@ -634,6 +656,31 @@ Release 0.23.0 - Unreleased HADOOP-7662. Fixed logs servlet to use the pathspec '/*' instead of '/' for correct filtering. (Thomas Graves via vinodkv) + HADOOP-7691. Fixed conflict uid for install packages. (Eric Yang) + + HADOOP-7603. Set hdfs, mapred uid, and hadoop uid to fixed numbers. + (Eric Yang) + + HADOOP-7658. Fixed HADOOP_SECURE_DN_USER environment variable in + hadoop-evn.sh (Eric Yang) + + HADOOP-7684. Added init.d script for jobhistory server and + secondary namenode. (Eric Yang) + + HADOOP-7715. Removed unnecessary security logger configuration. (Eric Yang) + + HADOOP-7685. Improved directory ownership check function in + hadoop-setup-conf.sh. (Eric Yang) + + HADOOP-7711. Fixed recursive sourcing of HADOOP_OPTS environment + variables (Arpit Gupta via Eric Yang) + + HADOOP-7681. Fixed security and hdfs audit log4j properties + (Arpit Gupta via Eric Yang) + + HADOOP-7708. Fixed hadoop-setup-conf.sh to handle config files + consistently. (Eric Yang) + Release 0.22.0 - Unreleased INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index c310aa65e6..4fb1d19066 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -1632,10 +1632,6 @@ private synchronized Document asXmlDocument() throws IOException { try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); - - // Allow a broader set of control characters to appear in job confs. - // cf https://issues.apache.org/jira/browse/MAPREDUCE-109 - doc.setXmlVersion( "1.1" ); } catch (ParserConfigurationException pe) { throw new IOException(pe); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java index c526e10286..37b89f4f89 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java @@ -210,7 +210,7 @@ public HttpServer(String name, String bindAddress, int port, webServer.setHandler(contexts); webAppContext = new WebAppContext(); - webAppContext.setDisplayName("WepAppsContext"); + webAppContext.setDisplayName(name); webAppContext.setContextPath("/"); webAppContext.setWar(appDir + "/" + name); webAppContext.getServletContext().setAttribute(CONF_CONTEXT_ATTRIBUTE, conf); @@ -696,8 +696,44 @@ public void start() throws IOException { * stop the server */ public void stop() throws Exception { - listener.close(); - webServer.stop(); + MultiException exception = null; + try { + listener.close(); + } catch (Exception e) { + LOG.error("Error while stopping listener for webapp" + + webAppContext.getDisplayName(), e); + exception = addMultiException(exception, e); + } + + try { + // clear & stop webAppContext attributes to avoid memory leaks. + webAppContext.clearAttributes(); + webAppContext.stop(); + } catch (Exception e) { + LOG.error("Error while stopping web app context for webapp " + + webAppContext.getDisplayName(), e); + exception = addMultiException(exception, e); + } + try { + webServer.stop(); + } catch (Exception e) { + LOG.error("Error while stopping web server for webapp " + + webAppContext.getDisplayName(), e); + exception = addMultiException(exception, e); + } + + if (exception != null) { + exception.ifExceptionThrow(); + } + + } + + private MultiException addMultiException(MultiException exception, Exception e) { + if(exception == null){ + exception = new MultiException(); + } + exception.add(e); + return exception; } public void join() throws InterruptedException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java index 6ea45ced67..1f3878f1cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java @@ -24,6 +24,7 @@ import java.lang.reflect.Method; import java.util.Collections; import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,6 +33,11 @@ class RetryInvocationHandler implements InvocationHandler, Closeable { public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class); private FailoverProxyProvider proxyProvider; + + /** + * The number of times the associated proxyProvider has ever been failed over. + */ + private long proxyProviderFailoverCount = 0; private RetryPolicy defaultPolicy; private Map methodNameToPolicyMap; @@ -60,16 +66,24 @@ public Object invoke(Object proxy, Method method, Object[] args) policy = defaultPolicy; } - int failovers = 0; + // The number of times this method invocation has been failed over. + int invocationFailoverCount = 0; int retries = 0; while (true) { + // The number of times this invocation handler has ever been failed over, + // before this method invocation attempt. Used to prevent concurrent + // failed method invocations from triggering multiple failover attempts. + long invocationAttemptFailoverCount; + synchronized (proxyProvider) { + invocationAttemptFailoverCount = proxyProviderFailoverCount; + } try { return invokeMethod(method, args); } catch (Exception e) { boolean isMethodIdempotent = proxyProvider.getInterface() .getMethod(method.getName(), method.getParameterTypes()) .isAnnotationPresent(Idempotent.class); - RetryAction action = policy.shouldRetry(e, retries++, failovers, + RetryAction action = policy.shouldRetry(e, retries++, invocationFailoverCount, isMethodIdempotent); if (action == RetryAction.FAIL) { LOG.warn("Exception while invoking " + method.getName() @@ -81,10 +95,24 @@ public Object invoke(Object proxy, Method method, Object[] args) } else if (action == RetryAction.FAILOVER_AND_RETRY) { LOG.warn("Exception while invoking " + method.getName() + " of " + currentProxy.getClass() - + ". Trying to fail over.", e); - failovers++; - proxyProvider.performFailover(currentProxy); + + " after " + invocationFailoverCount + " fail over attempts." + + " Trying to fail over.", e); + // Make sure that concurrent failed method invocations only cause a + // single actual fail over. + synchronized (proxyProvider) { + if (invocationAttemptFailoverCount == proxyProviderFailoverCount) { + proxyProvider.performFailover(currentProxy); + proxyProviderFailoverCount++; + } else { + LOG.warn("A failover has occurred since the start of this method" + + " invocation attempt."); + } + } + // The call to getProxy() could technically only be made in the event + // performFailover() is called, but it needs to be out here for the + // purpose of testing. currentProxy = proxyProvider.getProxy(); + invocationFailoverCount++; } if(LOG.isDebugEnabled()) { LOG.debug("Exception while invoking " + method.getName() diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AvroRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AvroRpcEngine.java index 1b73351bf1..12aa04ff85 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AvroRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AvroRpcEngine.java @@ -29,6 +29,8 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.HashMap; import javax.net.SocketFactory; @@ -54,7 +56,7 @@ public class AvroRpcEngine implements RpcEngine { private static final Log LOG = LogFactory.getLog(RPC.class); - private static int VERSION = 0; + private static int VERSION = 1; // the implementation we tunnel through private static final RpcEngine ENGINE = new WritableRpcEngine(); @@ -62,9 +64,10 @@ public class AvroRpcEngine implements RpcEngine { /** Tunnel an Avro RPC request and response through Hadoop's RPC. */ private static interface TunnelProtocol extends VersionedProtocol { //WritableRpcEngine expects a versionID in every protocol. - public static final long versionID = 0L; + public static final long versionID = VERSION; /** All Avro methods and responses go through this. */ - BufferListWritable call(BufferListWritable request) throws IOException; + BufferListWritable call(String protocol, BufferListWritable request) + throws IOException; } /** A Writable that holds a List, The Avro RPC Transceiver's @@ -103,23 +106,25 @@ public void write(DataOutput out) throws IOException { private static class ClientTransceiver extends Transceiver { private TunnelProtocol tunnel; private InetSocketAddress remote; + private String protocol; public ClientTransceiver(InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, - int rpcTimeout) + int rpcTimeout, String protocol) throws IOException { this.tunnel = ENGINE.getProxy(TunnelProtocol.class, VERSION, addr, ticket, conf, factory, rpcTimeout).getProxy(); this.remote = addr; + this.protocol = protocol; } public String getRemoteName() { return remote.toString(); } public List transceive(List request) throws IOException { - return tunnel.call(new BufferListWritable(request)).buffers; + return tunnel.call(protocol, new BufferListWritable(request)).buffers; } public List readBuffers() throws IOException { @@ -159,7 +164,8 @@ public Invoker(Class protocol, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout) throws IOException { - this.tx = new ClientTransceiver(addr, ticket, conf, factory, rpcTimeout); + this.tx = new ClientTransceiver(addr, ticket, conf, factory, rpcTimeout, + protocol.getName()); this.requestor = createRequestor(protocol, tx); } @Override public Object invoke(Object proxy, Method method, Object[] args) @@ -182,9 +188,11 @@ protected Responder createResponder(Class iface, Object impl) { /** An Avro RPC Responder that can process requests passed via Hadoop RPC. */ private class TunnelResponder implements TunnelProtocol { - private Responder responder; - public TunnelResponder(Class iface, Object impl) { - responder = createResponder(iface, impl); + private Map responders = + new HashMap(); + + public void addProtocol(Class iface, Object impl) { + responders.put(iface.getName(), createResponder(iface, impl)); } @Override @@ -197,13 +205,18 @@ public long getProtocolVersion(String protocol, long version) public ProtocolSignature getProtocolSignature( String protocol, long version, int clientMethodsHashCode) throws IOException { - return new ProtocolSignature(VERSION, null); + return ProtocolSignature.getProtocolSignature + (clientMethodsHashCode, VERSION, TunnelProtocol.class); } - public BufferListWritable call(final BufferListWritable request) + public BufferListWritable call(String protocol, BufferListWritable request) throws IOException { + Responder responder = responders.get(protocol); + if (responder == null) + throw new IOException("No responder for: "+protocol); return new BufferListWritable(responder.respond(request.buffers)); } + } public Object[] call(Method method, Object[][] params, @@ -212,6 +225,32 @@ public Object[] call(Method method, Object[][] params, throw new UnsupportedOperationException(); } + private class Server extends WritableRpcEngine.Server { + private TunnelResponder responder = new TunnelResponder(); + + public Server(Class iface, Object impl, String bindAddress, + int port, int numHandlers, int numReaders, + int queueSizePerHandler, boolean verbose, + Configuration conf, + SecretManager secretManager + ) throws IOException { + super((Class)null, new Object(), conf, + bindAddress, port, numHandlers, numReaders, + queueSizePerHandler, verbose, secretManager); + super.addProtocol(TunnelProtocol.class, responder); + responder.addProtocol(iface, impl); + } + + + @Override + public Server + addProtocol(Class protocolClass, IMPL protocolImpl) + throws IOException { + responder.addProtocol(protocolClass, protocolImpl); + return this; + } + } + /** Construct a server for a protocol implementation instance listening on a * port and address. */ public RPC.Server getServer(Class iface, Object impl, String bindAddress, @@ -220,10 +259,9 @@ public RPC.Server getServer(Class iface, Object impl, String bindAddress, Configuration conf, SecretManager secretManager ) throws IOException { - return ENGINE.getServer(TunnelProtocol.class, - new TunnelResponder(iface, impl), - bindAddress, port, numHandlers, numReaders, - queueSizePerHandler, verbose, conf, secretManager); + return new Server + (iface, impl, bindAddress, port, numHandlers, numReaders, + queueSizePerHandler, verbose, conf, secretManager); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index c339ce7eaa..58cf810186 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -23,8 +23,6 @@ import java.net.InetSocketAddress; import java.net.SocketTimeoutException; import java.net.UnknownHostException; -import java.net.ConnectException; - import java.io.IOException; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -235,8 +233,11 @@ public Connection(ConnectionId remoteId) throws IOException { this.remoteId = remoteId; this.server = remoteId.getAddress(); if (server.isUnresolved()) { - throw new UnknownHostException("unknown host: " + - remoteId.getAddress().getHostName()); + throw NetUtils.wrapException(remoteId.getAddress().getHostName(), + remoteId.getAddress().getPort(), + null, + 0, + new UnknownHostException()); } this.rpcTimeout = remoteId.getRpcTimeout(); this.maxIdleTime = remoteId.getMaxIdleTime(); @@ -1084,7 +1085,12 @@ public Writable call(Writable param, ConnectionId remoteId) call.error.fillInStackTrace(); throw call.error; } else { // local exception - throw wrapException(remoteId.getAddress(), call.error); + InetSocketAddress address = remoteId.getAddress(); + throw NetUtils.wrapException(address.getHostName(), + address.getPort(), + NetUtils.getHostname(), + 0, + call.error); } } else { return call.value; @@ -1093,37 +1099,6 @@ public Writable call(Writable param, ConnectionId remoteId) } /** - * Take an IOException and the address we were trying to connect to - * and return an IOException with the input exception as the cause. - * The new exception provides the stack trace of the place where - * the exception is thrown and some extra diagnostics information. - * If the exception is ConnectException or SocketTimeoutException, - * return a new one of the same type; Otherwise return an IOException. - * - * @param addr target address - * @param exception the relevant exception - * @return an exception to throw - */ - private IOException wrapException(InetSocketAddress addr, - IOException exception) { - if (exception instanceof ConnectException) { - //connection refused; include the host:port in the error - return (ConnectException)new ConnectException( - "Call to " + addr + " failed on connection exception: " + exception) - .initCause(exception); - } else if (exception instanceof SocketTimeoutException) { - return (SocketTimeoutException)new SocketTimeoutException( - "Call to " + addr + " failed on socket timeout exception: " - + exception).initCause(exception); - } else { - return (IOException)new IOException( - "Call to " + addr + " failed on local exception: " + exception) - .initCause(exception); - - } - } - - /** * @deprecated Use {@link #call(Writable[], InetSocketAddress[], * Class, UserGroupInformation, Configuration)} instead */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java index beb8cc1cfa..5256e5e935 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java @@ -605,7 +605,7 @@ protected Server(String bindAddress, int port, * @param protocolImpl - the impl of the protocol that will be called * @return the server (for convenience) */ - public + public Server addProtocol(Class protocolClass, IMPL protocolImpl ) throws IOException { throw new IOException("addProtocol Not Implemented"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 0bfc5722f4..fa04120c5a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -51,8 +51,6 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; @@ -70,6 +68,7 @@ import org.apache.hadoop.ipc.RPC.VersionMismatch; import org.apache.hadoop.ipc.metrics.RpcDetailedMetrics; import org.apache.hadoop.ipc.metrics.RpcMetrics; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; @@ -227,20 +226,11 @@ public static void bind(ServerSocket socket, InetSocketAddress address, int backlog) throws IOException { try { socket.bind(address, backlog); - } catch (BindException e) { - BindException bindException = new BindException("Problem binding to " + address - + " : " + e.getMessage()); - bindException.initCause(e); - throw bindException; } catch (SocketException e) { - // If they try to bind to a different host's address, give a better - // error message. - if ("Unresolved address".equals(e.getMessage())) { - throw new UnknownHostException("Invalid hostname for server: " + - address.getHostName()); - } else { - throw e; - } + throw NetUtils.wrapException(null, + 0, + address.getHostName(), + address.getPort(), e); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 75b8d51f05..314bfac582 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -388,8 +388,9 @@ private void registerProtocolAndImpl(Class protocolClass, } protocolImplMap.put(new ProtoNameVer(protocolName, version), new ProtoClassProtoImpl(protocolClass, protocolImpl)); - LOG.info("ProtocolImpl=" + protocolImpl.getClass().getName() + - " protocolClass=" + protocolClass.getName() + " version=" + version); + LOG.info("Protocol Name = " + protocolName + " version=" + version + + " ProtocolImpl=" + protocolImpl.getClass().getName() + + " protocolClass=" + protocolClass.getName()); } private static class VerProtocolImpl { @@ -555,7 +556,7 @@ public Server(Class protocolClass, Object protocolImpl, @Override - public Server + public Server addProtocol( Class protocolClass, IMPL protocolImpl) throws IOException { registerProtocolAndImpl(protocolClass, protocolImpl); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java index 2c8f797214..c886d0043b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java @@ -117,13 +117,15 @@ public class JMXJsonServlet extends HttpServlet { private static final long serialVersionUID = 1L; - // ----------------------------------------------------- Instance Variables /** * MBean server. */ - protected transient MBeanServer mBeanServer = null; + protected transient MBeanServer mBeanServer; - // --------------------------------------------------------- Public Methods + /** + * Json Factory to create Json generators for write objects in json format + */ + protected transient JsonFactory jsonFactory; /** * Initialize this servlet. */ @@ -131,6 +133,7 @@ public class JMXJsonServlet extends HttpServlet { public void init() throws ServletException { // Retrieve the MBean server mBeanServer = ManagementFactory.getPlatformMBeanServer(); + jsonFactory = new JsonFactory(); } /** @@ -149,53 +152,45 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) { response)) { return; } + JsonGenerator jg = null; + try { + response.setContentType("application/json; charset=utf8"); - response.setContentType("application/json; charset=utf8"); + PrintWriter writer = response.getWriter(); + jg = jsonFactory.createJsonGenerator(writer); + jg.useDefaultPrettyPrinter(); + jg.writeStartObject(); - PrintWriter writer = response.getWriter(); - - JsonFactory jsonFactory = new JsonFactory(); - JsonGenerator jg = jsonFactory.createJsonGenerator(writer); - jg.useDefaultPrettyPrinter(); - jg.writeStartObject(); - if (mBeanServer == null) { - jg.writeStringField("result", "ERROR"); - jg.writeStringField("message", "No MBeanServer could be found"); - jg.close(); - LOG.error("No MBeanServer could be found."); - response.setStatus(HttpServletResponse.SC_NOT_FOUND); - return; - } - - // query per mbean attribute - String getmethod = request.getParameter("get"); - if (getmethod != null) { - String[] splitStrings = getmethod.split("\\:\\:"); - if (splitStrings.length != 2) { - jg.writeStringField("result", "ERROR"); - jg.writeStringField("message", "query format is not as expected."); - jg.close(); - response.setStatus(HttpServletResponse.SC_BAD_REQUEST); + // query per mbean attribute + String getmethod = request.getParameter("get"); + if (getmethod != null) { + String[] splitStrings = getmethod.split("\\:\\:"); + if (splitStrings.length != 2) { + jg.writeStringField("result", "ERROR"); + jg.writeStringField("message", "query format is not as expected."); + response.setStatus(HttpServletResponse.SC_BAD_REQUEST); + return; + } + listBeans(jg, new ObjectName(splitStrings[0]), splitStrings[1], + response); return; } - listBeans(jg, new ObjectName(splitStrings[0]), splitStrings[1], - response); - jg.close(); - return; - } - // query per mbean - String qry = request.getParameter("qry"); - if (qry == null) { - qry = "*:*"; + // query per mbean + String qry = request.getParameter("qry"); + if (qry == null) { + qry = "*:*"; + } + listBeans(jg, new ObjectName(qry), null, response); + } finally { + if (jg != null) { + jg.close(); + } } - listBeans(jg, new ObjectName(qry), null, response); - jg.close(); - - } catch ( IOException e ) { + } catch (IOException e) { LOG.error("Caught an exception while processing JMX request", e); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - } catch ( MalformedObjectNameException e ) { + } catch (MalformedObjectNameException e) { LOG.error("Caught an exception while processing JMX request", e); response.setStatus(HttpServletResponse.SC_BAD_REQUEST); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java index d94b69f183..5f35b85b79 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java @@ -20,12 +20,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.BindException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.NetworkInterface; +import java.net.NoRouteToHostException; import java.net.Socket; import java.net.SocketAddress; import java.net.SocketException; +import java.net.SocketTimeoutException; import java.net.URI; import java.net.URISyntaxException; import java.net.UnknownHostException; @@ -54,6 +57,13 @@ public class NetUtils { private static Map hostToResolved = new HashMap(); + /** text to point users elsewhere: {@value} */ + private static final String FOR_MORE_DETAILS_SEE + = " For more details see: "; + /** text included in wrapped exceptions if the host is null: {@value} */ + public static final String UNKNOWN_HOST = "(unknown)"; + /** Base URL of the Hadoop Wiki: {@value} */ + public static final String HADOOP_WIKI = "http://wiki.apache.org/hadoop/"; /** * Get the socket factory for the given class according to its @@ -537,4 +547,119 @@ public static boolean isLocalAddress(InetAddress addr) { } return local; } + + /** + * Take an IOException , the local host port and remote host port details and + * return an IOException with the input exception as the cause and also + * include the host details. The new exception provides the stack trace of the + * place where the exception is thrown and some extra diagnostics information. + * If the exception is BindException or ConnectException or + * UnknownHostException or SocketTimeoutException, return a new one of the + * same type; Otherwise return an IOException. + * + * @param destHost target host (nullable) + * @param destPort target port + * @param localHost local host (nullable) + * @param localPort local port + * @param exception the caught exception. + * @return an exception to throw + */ + public static IOException wrapException(final String destHost, + final int destPort, + final String localHost, + final int localPort, + final IOException exception) { + if (exception instanceof BindException) { + return new BindException( + "Problem binding to [" + + localHost + + ":" + + localPort + + "] " + + exception + + ";" + + see("BindException")); + } else if (exception instanceof ConnectException) { + // connection refused; include the host:port in the error + return (ConnectException) new ConnectException( + "Call From " + + localHost + + " to " + + destHost + + ":" + + destPort + + " failed on connection exception: " + + exception + + ";" + + see("ConnectionRefused")) + .initCause(exception); + } else if (exception instanceof UnknownHostException) { + return (UnknownHostException) new UnknownHostException( + "Invalid host name: " + + getHostDetailsAsString(destHost, destPort, localHost) + + exception + + ";" + + see("UnknownHost")) + .initCause(exception); + } else if (exception instanceof SocketTimeoutException) { + return (SocketTimeoutException) new SocketTimeoutException( + "Call From " + + localHost + " to " + destHost + ":" + destPort + + " failed on socket timeout exception: " + exception + + ";" + + see("SocketTimeout")) + .initCause(exception); + } else if (exception instanceof NoRouteToHostException) { + return (NoRouteToHostException) new NoRouteToHostException( + "No Route to Host from " + + localHost + " to " + destHost + ":" + destPort + + " failed on socket timeout exception: " + exception + + ";" + + see("NoRouteToHost")) + .initCause(exception); + } + else { + return (IOException) new IOException("Failed on local exception: " + + exception + + "; Host Details : " + + getHostDetailsAsString(destHost, destPort, localHost)) + .initCause(exception); + + } + } + + private static String see(final String entry) { + return FOR_MORE_DETAILS_SEE + HADOOP_WIKI + entry; + } + + /** + * Get the host details as a string + * @param destHost destinatioon host (nullable) + * @param destPort destination port + * @param localHost local host (nullable) + * @return a string describing the destination host:port and the local host + */ + private static String getHostDetailsAsString(final String destHost, + final int destPort, + final String localHost) { + StringBuilder hostDetails = new StringBuilder(27); + hostDetails.append("local host is: ") + .append(quoteHost(localHost)) + .append("; "); + hostDetails.append("destination host is: \"").append(quoteHost(destHost)) + .append(":") + .append(destPort).append("; "); + return hostDetails.toString(); + } + + /** + * Quote a hostname if it is not null + * @param hostname the hostname; nullable + * @return a quoted hostname or {@link #UNKNOWN_HOST} if the hostname is null + */ + private static String quoteHost(final String hostname) { + return (hostname != null) ? + ("\"" + hostname + "\"") + : UNKNOWN_HOST; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/packages/deb/hadoop.control/preinst b/hadoop-common-project/hadoop-common/src/main/packages/deb/hadoop.control/preinst index 23a33fc9c3..cde980fe3f 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/deb/hadoop.control/preinst +++ b/hadoop-common-project/hadoop-common/src/main/packages/deb/hadoop.control/preinst @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop +getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -g 123 -r hadoop diff --git a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-applications.sh b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-applications.sh new file mode 100644 index 0000000000..521c1599c3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-applications.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +this="${BASH_SOURCE-$0}" +bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) +script="$(basename -- "$this")" +this="$bin/$script" + +. "$bin"/../libexec/hadoop-config.sh + +usage() { + echo " +usage: $0 + Require parameter: + --config /etc/hadoop Location of Hadoop configuration file + --apps= Apps you want to setup on hdfs + If user is not specified, app name + will be used as the user name as well + Optional parameters: + -h Display this message + --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm + --super-user=hdfs Set super user id + --super-user-keytab=/etc/security/keytabs/hdfs.keytab Set super user keytab location + " + exit 1 +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -l 'kerberos-realm:' \ + -l 'super-user:' \ + -l 'super-user-keytab:' \ + -l 'apps:' \ + -o 'h' \ + -- "$@") + +if [ $? != 0 ] ; then + usage + exit 1 +fi + +function setup_apps +{ + if [ -z $APPS ] + then + usage + break + fi + + #if super user is not set default to hdfs + HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs} + + if [ ! "${KERBEROS_REALM}" = "" ]; then + # locate kinit cmd + if [ -e /etc/lsb-release ]; then + KINIT_CMD="/usr/bin/kinit -kt ${HDFS_USER_KEYTAB} ${HADOOP_HDFS_USER}" + else + KINIT_CMD="/usr/kerberos/bin/kinit -kt ${HDFS_USER_KEYTAB} ${HADOOP_HDFS_USER}" + fi + su -c "${KINIT_CMD}" ${HADOOP_HDFS_USER} + fi + #process each app + oldIFS=$IFS + IFS=',' + for app in $APPS + do + IFS=":" + arr=($app) + app=${arr[0]} + user=${arr[1]} + IFS=',' + #if user is empty, default it to app + if [ -z $user ] + then + user=$app + fi + + path="/apps/${app}" + + #create the dir + cmd="su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir ${path}' ${HADOOP_HDFS_USER}" + echo $cmd + eval $cmd + + #make owner to be the app + cmd="su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chown ${user} ${path}' ${HADOOP_HDFS_USER}" + echo $cmd + eval $cmd + + if [ "$?" == "0" ]; then + echo "App directory has been setup: ${path}" + fi + done + IFS=$oldIFS +} + +eval set -- "${OPTS}" +while true; do + case "$1" in + --apps) + APPS=$2; shift 2 + ;; + --kerberos-realm) + KERBEROS_REALM=$2; shift 2 + ;; + --super-user) + HADOOP_HDFS_USER=$2; shift 2 + ;; + --super-user-keytab) + HDFS_USER_KEYTAB=$2; shift 2 + ;; + -h) + usage + ;; + --) + shift ; break + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +setup_apps diff --git a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh index 96a989fc39..8d54b2e41d 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh +++ b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh @@ -51,6 +51,10 @@ usage: $0 --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler --datanodes=hostname1,hostname2,... SET the datanodes --tasktrackers=hostname1,hostname2,... SET the tasktrackers + --dfs-webhdfs-enabled=false|true Enable webhdfs + --dfs-support-append=false|true Enable append + --hadoop-proxy-users='user1:groups:hosts;user2:groups:hosts' Setup proxy users for hadoop + --hbase-user=hbase User which hbase is running as. Defaults to hbase " exit 1 } @@ -60,9 +64,11 @@ check_permission() { OWNER="0" RESULT=0 while [ "$TARGET" != "/" ]; do - PARENT=`dirname $TARGET` - NAME=`basename $TARGET` - OWNER=`ls -ln $PARENT | grep $NAME| awk '{print $3}'` + if [ "`uname`" = "Darwin" ]; then + OWNER=`stat -f %u $TARGET` + else + OWNER=`stat -c %u $TARGET` + fi if [ "$OWNER" != "0" ]; then RESULT=1 break @@ -74,6 +80,9 @@ check_permission() { template_generator() { REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})' + if [ -e $2 ]; then + mv -f $2 "$2.bak" + fi cat $1 | while read line ; do while [[ "$line" =~ $REGEX ]] ; do @@ -85,6 +94,78 @@ template_generator() { done } +######################################### +# Function to modify a value of a field in an xml file +# Params: $1 is the file with full path; $2 is the property, $3 is the new value +######################################### +function addPropertyToXMLConf +{ + #read the file name with full path + local file=$1 + #get the property name + local property=$2 + #get what value should be set for that + local propValue=$3 + #get the description + local desc=$4 + #get the value for the final tag + local finalVal=$5 + + #create the property text, make sure the / are escaped + propText="\n$property<\/name>\n$propValue<\/value>" + #if description is not empty add it + if [ ! -z $desc ] + then + propText="${propText}$desc<\/description>\n" + fi + + #if final is not empty add it + if [ ! -z $finalVal ] + then + propText="${propText}final>$finalVal<\/final>\n" + fi + + #add the ending tag + propText="${propText}<\/property>\n" + + #add the property to the file + endText="<\/configuration>" + #add the text using sed at the end of the file + sed -i "s|$endText|$propText$endText|" $file +} + +########################################## +# Function to setup up the proxy user settings +######################################### +function setupProxyUsers +{ + #if hadoop proxy users are sent, setup hadoop proxy + if [ ! -z $HADOOP_PROXY_USERS ] + then + oldIFS=$IFS + IFS=';' + #process each proxy config + for proxy in $HADOOP_PROXY_USERS + do + #get the user, group and hosts information for each proxy + IFS=':' + arr=($proxy) + user="${arr[0]}" + groups="${arr[1]}" + hosts="${arr[2]}" + #determine the property names and values + proxy_groups_property="hadoop.proxyuser.${user}.groups" + proxy_groups_val="$groups" + addPropertyToXMLConf "${HADOOP_CONF_DIR}/hdfs-site.xml" "$proxy_groups_property" "$proxy_groups_val" + proxy_hosts_property="hadoop.proxyuser.${user}.hosts" + proxy_hosts_val="$hosts" + addPropertyToXMLConf "${HADOOP_CONF_DIR}/hdfs-site.xml" "$proxy_hosts_property" "$proxy_hosts_val" + IFS=';' + done + IFS=$oldIFS + fi +} + OPTS=$(getopt \ -n $0 \ -o '' \ @@ -113,6 +194,10 @@ OPTS=$(getopt \ -l 'kinit-location:' \ -l 'datanodes:' \ -l 'tasktrackers:' \ + -l 'dfs-webhdfs-enabled:' \ + -l 'hadoop-proxy-users:' \ + -l 'dfs-support-append:' \ + -l 'hbase-user:' \ -o 'h' \ -- "$@") @@ -232,6 +317,22 @@ while true ; do AUTOMATED=1 TASKTRACKERS=$(echo $TASKTRACKERS | tr ',' ' ') ;; + --dfs-webhdfs-enabled) + DFS_WEBHDFS_ENABLED=$2; shift 2 + AUTOMATED=1 + ;; + --hadoop-proxy-users) + HADOOP_PROXY_USERS=$2; shift 2 + AUTOMATED=1 + ;; + --dfs-support-append) + DFS_SUPPORT_APPEND=$2; shift 2 + AUTOMATED=1 + ;; + --hbase-user) + HBASE_USER=$2; shift 2 + AUTOMATED=1 + ;; --) shift ; break ;; @@ -247,6 +348,7 @@ AUTOSETUP=${AUTOSETUP:-1} JAVA_HOME=${JAVA_HOME:-/usr/java/default} HADOOP_GROUP=${HADOOP_GROUP:-hadoop} HADOOP_NN_HOST=${HADOOP_NN_HOST:-`hostname`} +HADOOP_SNN_HOST=${HADOOP_SNN_HOST:-`hostname`} HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode} HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode} HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`} @@ -259,9 +361,14 @@ HADOOP_REPLICATION=${HADOOP_RELICATION:-3} HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler} HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs} HADOOP_MR_USER=${HADOOP_MR_USER:-mr} +DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false} +DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false} +HBASE_USER=${HBASE_USER:-hbase} KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs} HDFS_KEYTAB=${HDFS_KEYTAB:-/home/hdfs/hdfs.keytab} MR_KEYTAB=${MR_KEYTAB:-/home/mr/mr.keytab} +DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false} +DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false} KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM} SECURITY_TYPE=${SECURITY_TYPE:-simple} KINIT=${KINIT:-/usr/kerberos/bin/kinit} @@ -270,13 +377,18 @@ if [ "${SECURITY_TYPE}" = "kerberos" ]; then HADOOP_DN_ADDR="0.0.0.0:1019" HADOOP_DN_HTTP_ADDR="0.0.0.0:1022" SECURITY="true" + HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER} else TASK_CONTROLLER="org.apache.hadoop.mapred.DefaultTaskController" - HADDOP_DN_ADDR="0.0.0.0:50010" + HADOOP_DN_ADDR="0.0.0.0:50010" HADOOP_DN_HTTP_ADDR="0.0.0.0:50075" SECURITY="false" + HADOOP_SECURE_DN_USER="" fi +#unset env vars +unset HADOOP_CLIENT_OPTS HADOOP_NAMENODE_OPTS HADOOP_JOBTRACKER_OPTS HADOOP_TASKTRACKER_OPTS HADOOP_DATANODE_OPTS HADOOP_SECONDARYNAMENODE_OPTS HADOOP_JAVA_PLATFORM_OPTS + if [ "${AUTOMATED}" != "1" ]; then echo "Setup Hadoop Configuration" echo @@ -383,46 +495,6 @@ if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_MR_USER} chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_MR_USER} - if [ -e ${HADOOP_CONF_DIR}/core-site.xml ]; then - mv -f ${HADOOP_CONF_DIR}/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml.bak - fi - if [ -e ${HADOOP_CONF_DIR}/hdfs-site.xml ]; then - mv -f ${HADOOP_CONF_DIR}/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml.bak - fi - if [ -e ${HADOOP_CONF_DIR}/mapred-site.xml ]; then - mv -f ${HADOOP_CONF_DIR}/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml.bak - fi - if [ -e ${HADOOP_CONF_DIR}/hadoop-env.sh ]; then - mv -f ${HADOOP_CONF_DIR}/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh.bak - fi - if [ -e ${HADOOP_CONF_DIR}/hadoop-policy.xml ]; then - mv -f ${HADOOP_CONF_DIR}/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml.bak - fi - if [ -e ${HADOOP_CONF_DIR}/mapred-queue-acls.xml ]; then - mv -f ${HADOOP_CONF_DIR}/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml.bak - fi - if [ -e ${HADOOP_CONF_DIR}/commons-logging.properties ]; then - mv -f ${HADOOP_CONF_DIR}/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties.bak - fi - if [ -e ${HADOOP_CONF_DIR}/taskcontroller.cfg ]; then - mv -f ${HADOOP_CONF_DIR}/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg.bak - fi - if [ -e ${HADOOP_CONF_DIR}/slaves ]; then - mv -f ${HADOOP_CONF_DIR}/slaves ${HADOOP_CONF_DIR}/slaves.bak - fi - if [ -e ${HADOOP_CONF_DIR}/dfs.include ]; then - mv -f ${HADOOP_CONF_DIR}/dfs.include ${HADOOP_CONF_DIR}/dfs.include.bak - fi - if [ -e ${HADOOP_CONF_DIR}/dfs.exclude ]; then - mv -f ${HADOOP_CONF_DIR}/dfs.exclude ${HADOOP_CONF_DIR}/dfs.exclude.bak - fi - if [ -e ${HADOOP_CONF_DIR}/mapred.include ]; then - mv -f ${HADOOP_CONF_DIR}/mapred.include ${HADOOP_CONF_DIR}/mapred.include.bak - fi - if [ -e ${HADOOP_CONF_DIR}/mapred.exclude ]; then - mv -f ${HADOOP_CONF_DIR}/mapred.exclude ${HADOOP_CONF_DIR}/mapred.exclude.bak - fi - template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml @@ -431,7 +503,13 @@ if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties + #setup up the proxy users + setupProxyUsers + #set the owner of the hadoop dir to root chown root ${HADOOP_PREFIX} chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh @@ -474,15 +552,12 @@ else template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties - if [ ! -e ${HADOOP_CONF_DIR}/capacity-scheduler.xml ]; then - template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml - fi - if [ ! -e ${HADOOP_CONF_DIR}/hadoop-metrics2.properties ]; then - cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties - fi - if [ ! -e ${HADOOP_CONF_DIR}/log4j.properties ]; then - cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties - fi + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties + template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties + + #setup up the proxy users + setupProxyUsers chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh @@ -515,6 +590,12 @@ else echo "${HADOOP_CONF_DIR}/hdfs-site.xml" echo "${HADOOP_CONF_DIR}/mapred-site.xml" echo "${HADOOP_CONF_DIR}/hadoop-env.sh" + echo "${HADOOP_CONF_DIR}/hadoop-policy.xml" + echo "${HADOOP_CONF_DIR}/commons-logging.properties" + echo "${HADOOP_CONF_DIR}/taskcontroller.cfg" + echo "${HADOOP_CONF_DIR}/capacity-scheduler.xml" + echo "${HADOOP_CONF_DIR}/log4j.properties" + echo "${HADOOP_CONF_DIR}/hadoop-metrics2.properties" echo echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode." fi diff --git a/hadoop-common-project/hadoop-common/src/main/packages/rpm/spec/hadoop.spec b/hadoop-common-project/hadoop-common/src/main/packages/rpm/spec/hadoop.spec index 71c6d27de7..bfff1612d8 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/rpm/spec/hadoop.spec +++ b/hadoop-common-project/hadoop-common/src/main/packages/rpm/spec/hadoop.spec @@ -132,7 +132,7 @@ mv ${RPM_BUILD_DIR}/%{_final_name}/share/* ${RPM_BUILD_DIR}%{_share_dir} rm -rf ${RPM_BUILD_DIR}/%{_final_name}/etc %pre -getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop +getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -g 123 -r hadoop %post bash ${RPM_INSTALL_PREFIX0}/sbin/update-hadoop-env.sh \ diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/core-site.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/core-site.xml index d2f040c535..9fac2258d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/core-site.xml +++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/core-site.xml @@ -58,6 +58,8 @@ RULE:[2:$1@$0](hdfs@.*${KERBEROS_REALM})s/.*/${HADOOP_HDFS_USER}/ RULE:[2:$1@$0](mapredqa@.*${KERBEROS_REALM})s/.*/${HADOOP_MR_USER}/ RULE:[2:$1@$0](hdfsqa@.*${KERBEROS_REALM})s/.*/${HADOOP_HDFS_USER}/ + RULE:[2:$1@$0](hm@.*${KERBEROS_REALM})s/.*/${HBASE_USER}/ + RULE:[2:$1@$0](rs@.*${KERBEROS_REALM})s/.*/${HBASE_USER}/ DEFAULT diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-env.sh index e14ca285d2..c981877dd3 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-env.sh @@ -41,22 +41,22 @@ done #export HADOOP_NAMENODE_INIT_HEAPSIZE="" # Extra Java runtime options. Empty by default. -export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true $HADOOP_OPTS" +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true $HADOOP_CLIENT_OPTS" # Command specific options appended to HADOOP_OPTS when specified -export HADOOP_NAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_NAMENODE_OPTS}" -HADOOP_JOBTRACKER_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dmapred.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}" -HADOOP_TASKTRACKER_OPTS="-Dsecurity.audit.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}" -HADOOP_DATANODE_OPTS="-Dsecurity.audit.logger=ERROR,DRFAS ${HADOOP_DATANODE_OPTS}" +export HADOOP_NAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT $HADOOP_NAMENODE_OPTS" +HADOOP_JOBTRACKER_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dmapred.jobsummary.logger=INFO,JSA $HADOOP_JOBTRACKER_OPTS" +HADOOP_TASKTRACKER_OPTS="-Dsecurity.audit.logger=ERROR,console -Dmapred.audit.logger=ERROR,console $HADOOP_TASKTRACKER_OPTS" +HADOOP_DATANODE_OPTS="-Dsecurity.audit.logger=ERROR,DRFAS $HADOOP_DATANODE_OPTS" -export HADOOP_SECONDARYNAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_SECONDARYNAMENODE_OPTS}" +export HADOOP_SECONDARYNAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT $HADOOP_SECONDARYNAMENODE_OPTS" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) -export HADOOP_CLIENT_OPTS="-Xmx128m ${HADOOP_CLIENT_OPTS}" -#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData ${HADOOP_JAVA_PLATFORM_OPTS}" +export HADOOP_CLIENT_OPTS="-Xmx128m $HADOOP_CLIENT_OPTS" +#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" # On secure datanodes, user to run the datanode as after dropping privileges -export HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER} +export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} # Where log files are stored. $HADOOP_HOME/logs by default. export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml index ffec60355a..e1bb595874 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml +++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml @@ -263,4 +263,14 @@ excluded. + + dfs.webhdfs.enabled + ${DFS_WEBHDFS_ENABLED} + Enable or disable webhdfs. Defaults to false + + + dfs.support.append + ${DFS_SUPPORT_APPEND} + Enable or disable append. Defaults to false + diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties index 16c6aa6890..9ab56e499b 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties @@ -21,15 +21,6 @@ hadoop.root.logger=INFO,console hadoop.log.dir=. hadoop.log.file=hadoop.log -# -# Job Summary Appender -# -# Use following logger to send summary to separate file defined by -# hadoop.mapreduce.jobsummary.log.file rolled daily: -# hadoop.mapreduce.jobsummary.logger=INFO,JSA -# -hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} -hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log # Define the root logger to the system property "hadoop.root.logger". log4j.rootLogger=${hadoop.root.logger}, EventCounter @@ -90,19 +81,21 @@ log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n # #Security appender # +security.audit.logger=INFO,console hadoop.security.log.file=SecurityAuth.audit log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} - log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd + #new logger # Define some default values that can be overridden by system properties hadoop.security.logger=INFO,console -log4j.category.SecurityLogger=${hadoop.security.logger} +# # hdfs audit logging - +# hdfs.audit.logger=INFO,console log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false @@ -112,8 +105,9 @@ log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd +# # mapred audit logging - +# mapred.audit.logger=INFO,console log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false @@ -138,11 +132,6 @@ log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n -# -# FSNamesystem Audit logging -# All audit events are logged at INFO level -# -log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN # Custom Logging levels @@ -160,8 +149,14 @@ log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter # -# Job Summary Appender +# Job Summary Appender # +# Use following logger to send summary to separate file defined by +# hadoop.mapreduce.jobsummary.log.file rolled daily: +# hadoop.mapreduce.jobsummary.logger=INFO,JSA +# +hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} +hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} log4j.appender.JSA.layout=org.apache.log4j.PatternLayout @@ -170,26 +165,6 @@ log4j.appender.JSA.DatePattern=.yyyy-MM-dd log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false -# -# MapReduce Audit Log Appender -# - -# Set the MapReduce audit log filename -#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log - -# Appender for AuditLogger. -# Requires the following system properties to be set -# - hadoop.log.dir (Hadoop Log directory) -# - hadoop.mapreduce.audit.log.file (MapReduce audit log filename) - -#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT -#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false -#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender -#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file} -#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd -#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout -#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n - # # Yarn ResourceManager Application Summary Log # diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 5842db199d..f9f14fb848 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -58,7 +58,7 @@ protected void tearDown() throws Exception { } private void startConfig() throws IOException{ - out.write("\n"); + out.write("\n"); out.write("\n"); } @@ -221,18 +221,6 @@ public void testCommentsInValue() throws IOException { assertEquals("this contains a comment", conf.get("my.comment")); } - public void testControlAInValue() throws IOException { - out = new BufferedWriter(new FileWriter(CONFIG)); - startConfig(); - appendProperty("my.char", ""); - appendProperty("my.string", "somestring"); - endConfig(); - Path fileResource = new Path(CONFIG); - conf.addResource(fileResource); - assertEquals("\u0001", conf.get("my.char")); - assertEquals("some\u0001string", conf.get("my.string")); - } - public void testTrim() throws IOException { out=new BufferedWriter(new FileWriter(CONFIG)); startConfig(); @@ -310,7 +298,7 @@ public void testWriteXml() throws IOException { conf.writeXml(baos); String result = baos.toString(); assertTrue("Result has proper header", result.startsWith( - "")); + "")); assertTrue("Result has proper footer", result.endsWith("")); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerLifecycle.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerLifecycle.java index a205bf8519..27dd67f39c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerLifecycle.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerLifecycle.java @@ -56,16 +56,14 @@ private void assertNotLive(HttpServer server) { * * @throws Throwable on failure */ - @Test public void testStartedServerIsAlive() throws Throwable { + @Test + public void testStartedServerIsAlive() throws Throwable { HttpServer server = null; - try { - server = createTestServer(); - assertNotLive(server); - server.start(); - assertAlive(server); - } finally { - stop(server); - } + server = createTestServer(); + assertNotLive(server); + server.start(); + assertAlive(server); + stop(server); } /** @@ -105,4 +103,24 @@ private void assertToStringContains(HttpServer server, String text) { assertNotLive(server); } + /** + * Test that the server is alive once started + * + * @throws Throwable + * on failure + */ + @Test + public void testWepAppContextAfterServerStop() throws Throwable { + HttpServer server = null; + String key = "test.attribute.key"; + String value = "test.attribute.value"; + server = createTestServer(); + assertNotLive(server); + server.start(); + server.setAttribute(key, value); + assertAlive(server); + assertEquals(value, server.getAttribute(key)); + stop(server); + assertNull("Server context should have cleared", server.getAttribute(key)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java index 295bf13d11..ba6828a9b8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java @@ -20,6 +20,7 @@ import static org.junit.Assert.*; import java.io.IOException; +import java.util.concurrent.CountDownLatch; import org.apache.hadoop.io.retry.UnreliableImplementation.TypeOfExceptionToFailWith; import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException; @@ -35,22 +36,41 @@ public static class FlipFlopProxyProvider implements FailoverProxyProvider { private Object impl1; private Object impl2; + private boolean latchEnabled = false; + private CountDownLatch getProxyLatch; + private int failoversOccurred = 0; + public FlipFlopProxyProvider(Class iface, Object activeImpl, - Object standbyImpl) { + Object standbyImpl, int getProxyCountDown) { this.iface = iface; this.impl1 = activeImpl; this.impl2 = standbyImpl; currentlyActive = impl1; + getProxyLatch = new CountDownLatch(getProxyCountDown); + } + + public FlipFlopProxyProvider(Class iface, Object activeImpl, + Object standbyImpl) { + this(iface, activeImpl, standbyImpl, 0); } @Override public Object getProxy() { + if (latchEnabled) { + getProxyLatch.countDown(); + try { + getProxyLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } return currentlyActive; } @Override - public void performFailover(Object currentProxy) { + public synchronized void performFailover(Object currentProxy) { currentlyActive = impl1 == currentProxy ? impl2 : impl1; + failoversOccurred++; } @Override @@ -63,6 +83,13 @@ public void close() throws IOException { // Nothing to do. } + public void setLatchEnabled(boolean latchEnabled) { + this.latchEnabled = latchEnabled; + } + + public int getFailoversOccurred() { + return failoversOccurred; + } } public static class FailOverOnceOnAnyExceptionPolicy implements RetryPolicy { @@ -186,4 +213,55 @@ public void testFailoverOnNetworkExceptionIdempotentOperation() // IOException and this method is idempotent. assertEquals("impl2", unreliable.succeedsOnceThenFailsReturningStringIdempotent()); } -} + + private static class ConcurrentMethodThread extends Thread { + + private UnreliableInterface unreliable; + public String result; + + public ConcurrentMethodThread(UnreliableInterface unreliable) { + this.unreliable = unreliable; + } + + public void run() { + try { + result = unreliable.failsIfIdentifierDoesntMatch("impl2"); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + /** + * Test that concurrent failed method invocations only result in a single + * failover. + */ + @Test + public void testConcurrentMethodFailures() throws InterruptedException { + FlipFlopProxyProvider proxyProvider = new FlipFlopProxyProvider( + UnreliableInterface.class, + new UnreliableImplementation("impl1", + TypeOfExceptionToFailWith.STANDBY_EXCEPTION), + new UnreliableImplementation("impl2", + TypeOfExceptionToFailWith.STANDBY_EXCEPTION), + 2); + + final UnreliableInterface unreliable = (UnreliableInterface)RetryProxy + .create(UnreliableInterface.class, proxyProvider, + RetryPolicies.failoverOnNetworkException(10)); + + ConcurrentMethodThread t1 = new ConcurrentMethodThread(unreliable); + ConcurrentMethodThread t2 = new ConcurrentMethodThread(unreliable); + + // Getting a proxy will now wait on a latch. + proxyProvider.setLatchEnabled(true); + + t1.start(); + t2.start(); + t1.join(); + t2.join(); + assertEquals("impl2", t1.result); + assertEquals("impl2", t2.result); + assertEquals(1, proxyProvider.getFailoversOccurred()); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java index 10dc6b3830..7fa88b3b08 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java @@ -141,4 +141,23 @@ public String succeedsOnceThenFailsReturningStringIdempotent() } } + @Override + public String failsIfIdentifierDoesntMatch(String identifier) + throws UnreliableException, StandbyException, IOException { + if (this.identifier.equals(identifier)) { + return identifier; + } else { + switch (exceptionToFailWith) { + case STANDBY_EXCEPTION: + throw new StandbyException(identifier); + case UNRELIABLE_EXCEPTION: + throw new UnreliableException(identifier); + case IO_EXCEPTION: + throw new IOException(identifier); + default: + throw new RuntimeException(identifier); + } + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java index 04e4505001..e794c1686c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java @@ -63,4 +63,8 @@ public String succeedsOnceThenFailsReturningStringIdempotent() throws UnreliableException, StandbyException, IOException; public String succeedsTenTimesThenFailsReturningString() throws UnreliableException, StandbyException, IOException; + + @Idempotent + public String failsIfIdentifierDoesntMatch(String identifier) + throws UnreliableException, StandbyException, IOException; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestAvroRpc.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestAvroRpc.java index 7f7c510ab4..e7b6657a5c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestAvroRpc.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestAvroRpc.java @@ -43,6 +43,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.fs.CommonConfigurationKeys; /** Unit tests for AvroRpc. */ public class TestAvroRpc extends TestCase { @@ -56,6 +57,9 @@ public class TestAvroRpc extends TestCase { public TestAvroRpc(String name) { super(name); } + public static interface EmptyProtocol {} + public static class EmptyImpl implements EmptyProtocol {} + public static class TestImpl implements AvroTestProtocol { public void ping() {} @@ -93,10 +97,12 @@ private void testReflect(boolean secure) throws Exception { sm = new TestTokenSecretManager(); } UserGroupInformation.setConfiguration(conf); + RPC.setProtocolEngine(conf, EmptyProtocol.class, AvroRpcEngine.class); RPC.setProtocolEngine(conf, AvroTestProtocol.class, AvroRpcEngine.class); - Server server = RPC.getServer(AvroTestProtocol.class, - new TestImpl(), ADDRESS, 0, 5, true, - conf, sm); + RPC.Server server = RPC.getServer(EmptyProtocol.class, new EmptyImpl(), + ADDRESS, 0, 5, true, conf, sm); + server.addProtocol(AvroTestProtocol.class, new TestImpl()); + try { server.start(); InetSocketAddress addr = NetUtils.getConnectAddress(server); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 5d04c20023..7c01e2f191 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -23,7 +23,6 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.net.NetUtils; @@ -270,7 +269,7 @@ public void testStandAloneClient() throws Exception { fail("Expected an exception to have been thrown"); } catch (IOException e) { String message = e.getMessage(); - String addressText = address.toString(); + String addressText = address.getHostName() + ":" + address.getPort(); assertTrue("Did not find "+addressText+" in "+message, message.contains(addressText)); Throwable cause=e.getCause(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java index 7cc6f4d521..d0927b9821 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java @@ -17,10 +17,15 @@ */ package org.apache.hadoop.net; +import junit.framework.AssertionFailedError; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.junit.Test; import static org.junit.Assert.*; +import java.io.IOException; +import java.net.BindException; import java.net.InetAddress; import java.net.NetworkInterface; import java.net.Socket; @@ -34,6 +39,12 @@ public class TestNetUtils { + private static final Log LOG = LogFactory.getLog(TestNetUtils.class); + private static final int DEST_PORT = 4040; + private static final String DEST_PORT_NAME = Integer.toString(DEST_PORT); + private static final int LOCAL_PORT = 8080; + private static final String LOCAL_PORT_NAME = Integer.toString(LOCAL_PORT); + /** * Test that we can't accidentally connect back to the connecting socket due * to a quirk in the TCP spec. @@ -120,4 +131,100 @@ public void testIsLocalAddress() throws Exception { } assertFalse(NetUtils.isLocalAddress(InetAddress.getByName("8.8.8.8"))); } + + @Test + public void testWrapConnectException() throws Throwable { + IOException e = new ConnectException("failed"); + IOException wrapped = verifyExceptionClass(e, ConnectException.class); + assertInException(wrapped, "failed"); + assertWikified(wrapped); + assertInException(wrapped, "localhost"); + assertRemoteDetailsIncluded(wrapped); + assertInException(wrapped, "/ConnectionRefused"); + } + + @Test + public void testWrapBindException() throws Throwable { + IOException e = new BindException("failed"); + IOException wrapped = verifyExceptionClass(e, BindException.class); + assertInException(wrapped, "failed"); + assertLocalDetailsIncluded(wrapped); + assertNotInException(wrapped, DEST_PORT_NAME); + assertInException(wrapped, "/BindException"); + } + + @Test + public void testWrapUnknownHostException() throws Throwable { + IOException e = new UnknownHostException("failed"); + IOException wrapped = verifyExceptionClass(e, UnknownHostException.class); + assertInException(wrapped, "failed"); + assertWikified(wrapped); + assertInException(wrapped, "localhost"); + assertRemoteDetailsIncluded(wrapped); + assertInException(wrapped, "/UnknownHost"); + } + + private void assertRemoteDetailsIncluded(IOException wrapped) + throws Throwable { + assertInException(wrapped, "desthost"); + assertInException(wrapped, DEST_PORT_NAME); + } + + private void assertLocalDetailsIncluded(IOException wrapped) + throws Throwable { + assertInException(wrapped, "localhost"); + assertInException(wrapped, LOCAL_PORT_NAME); + } + + private void assertWikified(Exception e) throws Throwable { + assertInException(e, NetUtils.HADOOP_WIKI); + } + + private void assertInException(Exception e, String text) throws Throwable { + String message = extractExceptionMessage(e); + if (!(message.contains(text))) { + throw new AssertionFailedError("Wrong text in message " + + "\"" + message + "\"" + + " expected \"" + text + "\"") + .initCause(e); + } + } + + private String extractExceptionMessage(Exception e) throws Throwable { + assertNotNull("Null Exception", e); + String message = e.getMessage(); + if (message == null) { + throw new AssertionFailedError("Empty text in exception " + e) + .initCause(e); + } + return message; + } + + private void assertNotInException(Exception e, String text) + throws Throwable{ + String message = extractExceptionMessage(e); + if (message.contains(text)) { + throw new AssertionFailedError("Wrong text in message " + + "\"" + message + "\"" + + " did not expect \"" + text + "\"") + .initCause(e); + } + } + + private IOException verifyExceptionClass(IOException e, + Class expectedClass) + throws Throwable { + assertNotNull("Null Exception", e); + IOException wrapped = + NetUtils.wrapException("desthost", DEST_PORT, + "localhost", LOCAL_PORT, + e); + LOG.info(wrapped.toString(), wrapped); + if(!(wrapped.getClass().equals(expectedClass))) { + throw new AssertionFailedError("Wrong exception class; expected " + + expectedClass + + " got " + wrapped.getClass() + ": " + wrapped).initCause(wrapped); + } + return wrapped; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 43c360fcb0..be5f7a3f3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -19,9 +19,13 @@ Trunk (unreleased changes) HDFS-2340. Support getFileBlockLocations and getDelegationToken in webhdfs. (szetszwo) + HDFS-2348. Support getContentSummary and getFileChecksum in webhdfs. + (szetszwo) + IMPROVEMENTS - HADOOP-7524 Change RPC to allow multiple protocols including multuple versions of the same protocol (sanjay Radia) + HADOOP-7524 Change RPC to allow multiple protocols including multuple + versions of the same protocol (sanjay Radia) HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants -> HdfsConstants. (Harsh J Chouraria via atm) @@ -50,6 +54,13 @@ Trunk (unreleased changes) HDFS-2355. Federation: enable using the same configuration file across all the nodes in the cluster. (suresh) + HDFS-2371. Refactor BlockSender.java for better readability. (suresh) + + HDFS-2158. Add JournalSet to manage the set of journals. (jitendra) + + HDFS-2395. Add a root element in the JSON responses of webhdfs. + (szetszwo) + BUG FIXES HDFS-2287. TestParallelRead has a small off-by-one bug. (todd) @@ -83,6 +94,9 @@ Trunk (unreleased changes) HDFS-2361. hftp is broken, fixed username checks in JspHelper. (jitendra) + HDFS-2298. Fix TestDfsOverAvroRpc by changing ClientProtocol to + not include multiple methods of the same name. (cutting) + Release 0.23.0 - Unreleased INCOMPATIBLE CHANGES @@ -1145,6 +1159,39 @@ Release 0.23.0 - Unreleased (todd) HDFS-2027. Image inspector should return finalized logs before unfinalized logs. (todd) + HDFS-2074. Determine edit log validity by truly reading and validating + transactions. (todd) + HDFS-2085. Finalize in-progress edit logs at startup. (todd) + HDFS-2026. SecondaryNameNode should properly handle the case where the + NameNode is reformatted. (todd) + HDFS-2077. Address checkpoint upload when one of the storage dirs is failed + (todd) + HDFS-2078. NameNode should not clear directory when restoring removed + storage. (todd) + HDFS-2088. Move edits log archiving logic into FSEditLog/JournalManager + (todd) + HDFS-2093. Handle case where an entirely empty log is left during NN crash + (todd) + HDFS-2102. Zero-pad edits filename to make them lexically sortable. (Ivan + Kelly via todd) + HDFS-2010. Fix NameNode to exit if all edit streams become inaccessible. + (atm via todd) + HDFS-2123. Checkpoint interval should be based on txn count, not size. + (todd) + HDFS-1979. Fix backupnode for new edits/image layout. (todd) + HDFS-2101. Fix remaining unit tests for new storage filenames. (todd) + HDFS-2133. Address remaining TODOs and pre-merge cleanup on HDFS-1073 + branch. (todd) + HDFS-1780. Reduce need to rewrite FSImage on startup. (todd) + HDFS-2104. Add a flag to the 2NN to format its checkpoint dirs on startup. + (todd) + HDFS-2135. Fix regression of HDFS-1955 in HDFS-1073 branch. (todd) + HDFS-2160. Fix CreateEditsLog test tool in HDFS-1073 branch. (todd) + HDFS-2168. Reenable TestEditLog.testFailedOpen and fix exposed bug. (todd) + HDFS-2169. Clean up TestCheckpoint and remove TODOs (todd) + HDFS-2170. Address remaining TODOs in HDFS-1073 branch. (todd) + HDFS-2172. Address findbugs and javadoc warnings in HDFS-1073 branch. + (todd) Release 0.22.0 - Unreleased diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 0b1595d87e..41fc651074 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -874,13 +874,13 @@ public void concat(String trg, String [] srcs) throws IOException { } /** * Rename file or directory. - * @see ClientProtocol#rename(String, String, Options.Rename...) + * @see ClientProtocol#rename2(String, String, Options.Rename...) */ public void rename(String src, String dst, Options.Rename... options) throws IOException { checkOpen(); try { - namenode.rename(src, dst, options); + namenode.rename2(src, dst, options); } catch(RemoteException re) { throw re.unwrapRemoteException(AccessControlException.class, DSQuotaExceededException.class, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index e2ecbaa46d..e69a2727b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -67,9 +67,9 @@ public interface ClientProtocol extends VersionedProtocol { * Compared to the previous version the following changes have been introduced: * (Only the latest change is reflected. * The log of historical changes can be retrieved from the svn). - * 68: Add Balancer Bandwidth Command protocol + * 69: Eliminate overloaded method names. */ - public static final long versionID = 68L; + public static final long versionID = 69L; /////////////////////////////////////// // File contents @@ -419,27 +419,12 @@ public void concat(String trg, String[] srcs) * dst contains a symlink * @throws IOException If an I/O error occurred */ - public void rename(String src, String dst, Options.Rename... options) + public void rename2(String src, String dst, Options.Rename... options) throws AccessControlException, DSQuotaExceededException, FileAlreadyExistsException, FileNotFoundException, NSQuotaExceededException, ParentNotDirectoryException, SafeModeException, UnresolvedLinkException, IOException; - /** - * Delete the given file or directory from the file system. - *

- * Any blocks belonging to the deleted files will be garbage-collected. - * - * @param src existing name. - * @return true only if the existing file or directory was actually removed - * from the file system. - * @throws UnresolvedLinkException if src contains a symlink. - * @deprecated use {@link #delete(String, boolean)} istead. - */ - @Deprecated - public boolean delete(String src) - throws IOException, UnresolvedLinkException; - /** * Delete the given file or directory from the file system. *

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java index f6dc94d32e..2a53b3dd78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java @@ -404,7 +404,7 @@ private void verifyBlock(ExtendedBlock block) { adjustThrottler(); blockSender = new BlockSender(block, 0, -1, false, false, true, - datanode); + datanode, null); DataOutputStream out = new DataOutputStream(new IOUtils.NullOutputStream()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index b9e3858f3e..84b38b37e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -41,191 +41,230 @@ /** * Reads a block from the disk and sends it to a recipient. + * + * Data sent from the BlockeSender in the following format: + *
Data format:

+ *    +--------------------------------------------------+
+ *    | ChecksumHeader | Sequence of data PACKETS...     |
+ *    +--------------------------------------------------+ 
+ * 
+ * ChecksumHeader format:
+ *    +--------------------------------------------------+
+ *    | 1 byte CHECKSUM_TYPE | 4 byte BYTES_PER_CHECKSUM |
+ *    +--------------------------------------------------+ 
+ * 
+ * An empty packet is sent to mark the end of block and read completion. + * + * PACKET Contains a packet header, checksum and data. Amount of data + * carried is set by BUFFER_SIZE. + *
+ *    +-----------------------------------------------------+
+ *    | 4 byte packet length (excluding packet header)      |
+ *    +-----------------------------------------------------+
+ *    | 8 byte offset in the block | 8 byte sequence number |
+ *    +-----------------------------------------------------+
+ *    | 1 byte isLastPacketInBlock                          |
+ *    +-----------------------------------------------------+
+ *    | 4 byte Length of actual data                        |
+ *    +-----------------------------------------------------+
+ *    | x byte checksum data. x is defined below            |
+ *    +-----------------------------------------------------+
+ *    | actual data ......                                  |
+ *    +-----------------------------------------------------+
+ *    
+ *    Data is made of Chunks. Each chunk is of length <= BYTES_PER_CHECKSUM.
+ *    A checksum is calculated for each chunk.
+ *    
+ *    x = (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM *
+ *        CHECKSUM_SIZE
+ *        
+ *    CHECKSUM_SIZE depends on CHECKSUM_TYPE (usually, 4 for CRC32) 
+ *    
+ * + * The client reads data until it receives a packet with + * "LastPacketInBlock" set to true or with a zero length. If there is + * no checksum error, it replies to DataNode with OP_STATUS_CHECKSUM_OK: + *
+ *    +------------------------------+
+ *    | 2 byte OP_STATUS_CHECKSUM_OK |
+ *    +------------------------------+
+ *  
*/ class BlockSender implements java.io.Closeable { - public static final Log LOG = DataNode.LOG; + static final Log LOG = DataNode.LOG; static final Log ClientTraceLog = DataNode.ClientTraceLog; - - private ExtendedBlock block; // the block to read from - - /** the replica to read from */ - private final Replica replica; - /** The visible length of a replica. */ - private final long replicaVisibleLength; - - private static final boolean is32Bit = System.getProperty("sun.arch.data.model").equals("32"); - - private InputStream blockIn; // data stream - private long blockInPosition = -1; // updated while using transferTo(). - private DataInputStream checksumIn; // checksum datastream - private DataChecksum checksum; // checksum stream - private long offset; // starting position to read - private long endOffset; // ending position - private int bytesPerChecksum; // chunk size - private int checksumSize; // checksum size - private boolean corruptChecksumOk; // if need to verify checksum - private boolean chunkOffsetOK; // if need to send chunk offset - private long seqno; // sequence number of packet - - private boolean transferToAllowed = true; - // set once entire requested byte range has been sent to the client - private boolean sentEntireByteRange; - private boolean verifyChecksum; //if true, check is verified while reading - private DataTransferThrottler throttler; - private final String clientTraceFmt; // format of client trace log message - + private static final boolean is32Bit = + System.getProperty("sun.arch.data.model").equals("32"); /** * Minimum buffer used while sending data to clients. Used only if * transferTo() is enabled. 64KB is not that large. It could be larger, but * not sure if there will be much more improvement. */ private static final int MIN_BUFFER_WITH_TRANSFERTO = 64*1024; - private volatile ChunkChecksum lastChunkChecksum = null; - + private static final int TRANSFERTO_BUFFER_SIZE = Math.max( + HdfsConstants.IO_FILE_BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO); - BlockSender(ExtendedBlock block, long startOffset, long length, - boolean corruptChecksumOk, boolean chunkOffsetOK, - boolean verifyChecksum, DataNode datanode) throws IOException { - this(block, startOffset, length, corruptChecksumOk, chunkOffsetOK, - verifyChecksum, datanode, null); - } - + /** the block to read from */ + private final ExtendedBlock block; + /** the replica to read from */ + private final Replica replica; + /** The visible length of a replica. */ + private final long replicaVisibleLength; + /** Stream to read block data from */ + private InputStream blockIn; + /** updated while using transferTo() */ + private long blockInPosition = -1; + /** Stream to read checksum */ + private DataInputStream checksumIn; + /** Checksum utility */ + private final DataChecksum checksum; + /** Starting position to read */ + private long offset; + /** Position of last byte to read from block file */ + private final long endOffset; + /** Number of bytes in chunk used for computing checksum */ + private final int chunkSize; + /** Number bytes of checksum computed for a chunk */ + private final int checksumSize; + /** If true, failure to read checksum is ignored */ + private final boolean corruptChecksumOk; + /** true if chunk offset is needed to be sent in Checksum header */ + private final boolean chunkOffsetOK; + /** Sequence number of packet being sent */ + private long seqno; + /** Set to true if transferTo is allowed for sending data to the client */ + private final boolean transferToAllowed; + /** Set to true once entire requested byte range has been sent to the client */ + private boolean sentEntireByteRange; + /** When true, verify checksum while reading from checksum file */ + private final boolean verifyChecksum; + /** Format used to print client trace log messages */ + private final String clientTraceFmt; + private volatile ChunkChecksum lastChunkChecksum = null; + + /** + * Constructor + * + * @param block Block that is being read + * @param startOffset starting offset to read from + * @param length length of data to read + * @param corruptChecksumOk + * @param chunkOffsetOK need to send check offset in checksum header + * @param verifyChecksum verify checksum while reading the data + * @param datanode datanode from which the block is being read + * @param clientTraceFmt format string used to print client trace logs + * @throws IOException + */ BlockSender(ExtendedBlock block, long startOffset, long length, boolean corruptChecksumOk, boolean chunkOffsetOK, boolean verifyChecksum, DataNode datanode, String clientTraceFmt) throws IOException { try { this.block = block; + this.chunkOffsetOK = chunkOffsetOK; + this.corruptChecksumOk = corruptChecksumOk; + this.verifyChecksum = verifyChecksum; + this.clientTraceFmt = clientTraceFmt; + synchronized(datanode.data) { - this.replica = datanode.data.getReplica(block.getBlockPoolId(), - block.getBlockId()); - if (replica == null) { - throw new ReplicaNotFoundException(block); - } + this.replica = getReplica(block, datanode); this.replicaVisibleLength = replica.getVisibleLength(); } - long minEndOffset = startOffset + length; - // if this is a write in progress + // if there is a write in progress ChunkChecksum chunkChecksum = null; if (replica instanceof ReplicaBeingWritten) { - for (int i = 0; i < 30 && replica.getBytesOnDisk() < minEndOffset; i++) { - try { - Thread.sleep(100); - } catch (InterruptedException ie) { - throw new IOException(ie); - } - } - - long currentBytesOnDisk = replica.getBytesOnDisk(); - - if (currentBytesOnDisk < minEndOffset) { - throw new IOException(String.format( - "need %d bytes, but only %d bytes available", - minEndOffset, - currentBytesOnDisk - )); - } - + long minEndOffset = startOffset + length; + waitForMinLength((ReplicaBeingWritten)replica, minEndOffset); ReplicaInPipeline rip = (ReplicaInPipeline) replica; chunkChecksum = rip.getLastChecksumAndDataLen(); } if (replica.getGenerationStamp() < block.getGenerationStamp()) { - throw new IOException( - "replica.getGenerationStamp() < block.getGenerationStamp(), block=" + throw new IOException("Replica gen stamp < block genstamp, block=" + block + ", replica=" + replica); } if (replicaVisibleLength < 0) { - throw new IOException("The replica is not readable, block=" + throw new IOException("Replica is not readable, block=" + block + ", replica=" + replica); } if (DataNode.LOG.isDebugEnabled()) { DataNode.LOG.debug("block=" + block + ", replica=" + replica); } - - this.chunkOffsetOK = chunkOffsetOK; - this.corruptChecksumOk = corruptChecksumOk; - this.verifyChecksum = verifyChecksum; // transferToFully() fails on 32 bit platforms for block sizes >= 2GB, // use normal transfer in those cases this.transferToAllowed = datanode.transferToAllowed && - (!is32Bit || length < (long) Integer.MAX_VALUE); - this.clientTraceFmt = clientTraceFmt; + (!is32Bit || length <= Integer.MAX_VALUE); - if ( !corruptChecksumOk || datanode.data.metaFileExists(block) ) { + DataChecksum csum; + if (!corruptChecksumOk || datanode.data.metaFileExists(block)) { checksumIn = new DataInputStream(new BufferedInputStream(datanode.data .getMetaDataInputStream(block), HdfsConstants.IO_FILE_BUFFER_SIZE)); // read and handle the common header here. For now just a version - BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn); - short version = header.getVersion(); - + BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn); + short version = header.getVersion(); if (version != FSDataset.METADATA_VERSION) { LOG.warn("Wrong version (" + version + ") for metadata file for " + block + " ignoring ..."); } - checksum = header.getChecksum(); + csum = header.getChecksum(); } else { LOG.warn("Could not find metadata file for " + block); // This only decides the buffer size. Use BUFFER_SIZE? - checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, + csum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, 16 * 1024); } - /* If bytesPerChecksum is very large, then the metadata file - * is mostly corrupted. For now just truncate bytesPerchecksum to - * blockLength. - */ - bytesPerChecksum = checksum.getBytesPerChecksum(); - if (bytesPerChecksum > 10*1024*1024 && bytesPerChecksum > replicaVisibleLength) { - checksum = DataChecksum.newDataChecksum(checksum.getChecksumType(), + /* + * If chunkSize is very large, then the metadata file is mostly + * corrupted. For now just truncate bytesPerchecksum to blockLength. + */ + int size = csum.getBytesPerChecksum(); + if (size > 10*1024*1024 && size > replicaVisibleLength) { + csum = DataChecksum.newDataChecksum(csum.getChecksumType(), Math.max((int)replicaVisibleLength, 10*1024*1024)); - bytesPerChecksum = checksum.getBytesPerChecksum(); + size = csum.getBytesPerChecksum(); } + chunkSize = size; + checksum = csum; checksumSize = checksum.getChecksumSize(); - - if (length < 0) { - length = replicaVisibleLength; - } + length = length < 0 ? replicaVisibleLength : length; // end is either last byte on disk or the length for which we have a // checksum - if (chunkChecksum != null) { - endOffset = chunkChecksum.getDataLength(); - } else { - endOffset = replica.getBytesOnDisk(); - } - - if (startOffset < 0 || startOffset > endOffset - || (length + startOffset) > endOffset) { + long end = chunkChecksum != null ? chunkChecksum.getDataLength() + : replica.getBytesOnDisk(); + if (startOffset < 0 || startOffset > end + || (length + startOffset) > end) { String msg = " Offset " + startOffset + " and length " + length - + " don't match block " + block + " ( blockLen " + endOffset + " )"; + + " don't match block " + block + " ( blockLen " + end + " )"; LOG.warn(datanode.getDNRegistrationForBP(block.getBlockPoolId()) + ":sendBlock() : " + msg); throw new IOException(msg); } - offset = (startOffset - (startOffset % bytesPerChecksum)); + // Ensure read offset is position at the beginning of chunk + offset = startOffset - (startOffset % chunkSize); if (length >= 0) { - // Make sure endOffset points to end of a checksumed chunk. + // Ensure endOffset points to end of chunk. long tmpLen = startOffset + length; - if (tmpLen % bytesPerChecksum != 0) { - tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum); + if (tmpLen % chunkSize != 0) { + tmpLen += (chunkSize - tmpLen % chunkSize); } - if (tmpLen < endOffset) { + if (tmpLen < end) { // will use on-disk checksum here since the end is a stable chunk - endOffset = tmpLen; + end = tmpLen; } else if (chunkChecksum != null) { - //in last chunk which is changing. flag that we need to use in-memory - // checksum + // last chunk is changing. flag that we need to use in-memory checksum this.lastChunkChecksum = chunkChecksum; } } + endOffset = end; // seek to the right offsets if (offset > 0) { - long checksumSkip = (offset / bytesPerChecksum) * checksumSize; + long checksumSkip = (offset / chunkSize) * checksumSize; // note blockInStream is seeked when created below if (checksumSkip > 0) { // Should we use seek() for checksum file as well? @@ -237,7 +276,6 @@ class BlockSender implements java.io.Closeable { if (DataNode.LOG.isDebugEnabled()) { DataNode.LOG.debug("replica=" + replica); } - blockIn = datanode.data.getBlockInputStream(block, offset); // seek to offset } catch (IOException ioe) { IOUtils.closeStream(this); @@ -251,19 +289,17 @@ class BlockSender implements java.io.Closeable { */ public void close() throws IOException { IOException ioe = null; - // close checksum file if(checksumIn!=null) { try { - checksumIn.close(); + checksumIn.close(); // close checksum file } catch (IOException e) { ioe = e; } checksumIn = null; - } - // close data file + } if(blockIn!=null) { try { - blockIn.close(); + blockIn.close(); // close data file } catch (IOException e) { ioe = e; } @@ -274,7 +310,41 @@ public void close() throws IOException { throw ioe; } } - + + private static Replica getReplica(ExtendedBlock block, DataNode datanode) + throws ReplicaNotFoundException { + Replica replica = datanode.data.getReplica(block.getBlockPoolId(), + block.getBlockId()); + if (replica == null) { + throw new ReplicaNotFoundException(block); + } + return replica; + } + + /** + * Wait for rbw replica to reach the length + * @param rbw replica that is being written to + * @param len minimum length to reach + * @throws IOException on failing to reach the len in given wait time + */ + private static void waitForMinLength(ReplicaBeingWritten rbw, long len) + throws IOException { + // Wait for 3 seconds for rbw replica to reach the minimum length + for (int i = 0; i < 30 && rbw.getBytesOnDisk() < len; i++) { + try { + Thread.sleep(100); + } catch (InterruptedException ie) { + throw new IOException(ie); + } + } + long bytesOnDisk = rbw.getBytesOnDisk(); + if (bytesOnDisk < len) { + throw new IOException( + String.format("Need %d bytes, but only %d bytes available", len, + bytesOnDisk)); + } + } + /** * Converts an IOExcpetion (not subclasses) to SocketException. * This is typically done to indicate to upper layers that the error @@ -296,54 +366,43 @@ private static IOException ioeToSocketException(IOException ioe) { } /** - * Sends upto maxChunks chunks of data. - * - * When blockInPosition is >= 0, assumes 'out' is a - * {@link SocketOutputStream} and tries - * {@link SocketOutputStream#transferToFully(FileChannel, long, int)} to - * send data (and updates blockInPosition). + * @param datalen Length of data + * @return number of chunks for data of given size */ - private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) - throws IOException { - // Sends multiple chunks in one packet with a single write(). + private int numberOfChunks(long datalen) { + return (int) ((datalen + chunkSize - 1)/chunkSize); + } + + /** + * Sends a packet with up to maxChunks chunks of data. + * + * @param pkt buffer used for writing packet data + * @param maxChunks maximum number of chunks to send + * @param out stream to send data to + * @param transferTo use transferTo to send data + * @param throttler used for throttling data transfer bandwidth + */ + private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out, + boolean transferTo, DataTransferThrottler throttler) throws IOException { + int dataLen = (int) Math.min(endOffset - offset, + (chunkSize * (long) maxChunks)); + + int numChunks = numberOfChunks(dataLen); // Number of chunks be sent in the packet + int checksumDataLen = numChunks * checksumSize; + int packetLen = dataLen + checksumDataLen + 4; + boolean lastDataPacket = offset + dataLen == endOffset && dataLen > 0; - int len = (int) Math.min(endOffset - offset, - (((long) bytesPerChecksum) * ((long) maxChunks))); - int numChunks = (len + bytesPerChecksum - 1)/bytesPerChecksum; - int packetLen = len + numChunks*checksumSize + 4; - boolean lastDataPacket = offset + len == endOffset && len > 0; - pkt.clear(); - - - PacketHeader header = new PacketHeader( - packetLen, offset, seqno, (len == 0), len); - header.putInBuffer(pkt); + writePacketHeader(pkt, dataLen, packetLen); int checksumOff = pkt.position(); - int checksumLen = numChunks * checksumSize; byte[] buf = pkt.array(); if (checksumSize > 0 && checksumIn != null) { - try { - checksumIn.readFully(buf, checksumOff, checksumLen); - } catch (IOException e) { - LOG.warn(" Could not read or failed to veirfy checksum for data" - + " at offset " + offset + " for block " + block, e); - IOUtils.closeStream(checksumIn); - checksumIn = null; - if (corruptChecksumOk) { - if (checksumOff < checksumLen) { - // Just fill the array with zeros. - Arrays.fill(buf, checksumOff, checksumLen, (byte) 0); - } - } else { - throw e; - } - } + readChecksum(buf, checksumOff, checksumDataLen); // write in progress that we need to use to get last checksum if (lastDataPacket && lastChunkChecksum != null) { - int start = checksumOff + checksumLen - checksumSize; + int start = checksumOff + checksumDataLen - checksumSize; byte[] updatedChecksum = lastChunkChecksum.getChecksum(); if (updatedChecksum != null) { @@ -352,52 +411,28 @@ private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) } } - int dataOff = checksumOff + checksumLen; - - if (blockInPosition < 0) { - //normal transfer - IOUtils.readFully(blockIn, buf, dataOff, len); + int dataOff = checksumOff + checksumDataLen; + if (!transferTo) { // normal transfer + IOUtils.readFully(blockIn, buf, dataOff, dataLen); if (verifyChecksum) { - int dOff = dataOff; - int cOff = checksumOff; - int dLeft = len; - - for (int i=0; i= 0) { - //use transferTo(). Checks on out and blockIn are already done. - + if (transferTo) { SocketOutputStream sockOut = (SocketOutputStream)out; - //first write the packet - sockOut.write(buf, 0, dataOff); + sockOut.write(buf, 0, dataOff); // First write checksum + // no need to flush. since we know out is not a buffered stream. - sockOut.transferToFully(((FileInputStream)blockIn).getChannel(), - blockInPosition, len); - - blockInPosition += len; - } else { + blockInPosition, dataLen); + blockInPosition += dataLen; + } else { // normal transfer - out.write(buf, 0, dataOff + len); + out.write(buf, 0, dataOff + dataLen); } - } catch (IOException e) { /* Exception while writing to the client. Connection closure from * the other end is mostly the case and we do not care much about @@ -419,9 +454,72 @@ private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) throttler.throttle(packetLen); } - return len; + return dataLen; } + + /** + * Read checksum into given buffer + * @param buf buffer to read the checksum into + * @param checksumOffset offset at which to write the checksum into buf + * @param checksumLen length of checksum to write + * @throws IOException on error + */ + private void readChecksum(byte[] buf, final int checksumOffset, + final int checksumLen) throws IOException { + if (checksumSize <= 0 && checksumIn == null) { + return; + } + try { + checksumIn.readFully(buf, checksumOffset, checksumLen); + } catch (IOException e) { + LOG.warn(" Could not read or failed to veirfy checksum for data" + + " at offset " + offset + " for block " + block, e); + IOUtils.closeStream(checksumIn); + checksumIn = null; + if (corruptChecksumOk) { + if (checksumOffset < checksumLen) { + // Just fill the array with zeros. + Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0); + } + } else { + throw e; + } + } + } + + /** + * Compute checksum for chunks and verify the checksum that is read from + * the metadata file is correct. + * + * @param buf buffer that has checksum and data + * @param dataOffset position where data is written in the buf + * @param datalen length of data + * @param numChunks number of chunks corresponding to data + * @param checksumOffset offset where checksum is written in the buf + * @throws ChecksumException on failed checksum verification + */ + public void verifyChecksum(final byte[] buf, final int dataOffset, + final int datalen, final int numChunks, final int checksumOffset) + throws ChecksumException { + int dOff = dataOffset; + int cOff = checksumOffset; + int dLeft = datalen; + for (int i = 0; i < numChunks; i++) { + checksum.reset(); + int dLen = Math.min(dLeft, chunkSize); + checksum.update(buf, dOff, dLen); + if (!checksum.compare(buf, cOff)) { + long failedPos = offset + datalen - dLeft; + throw new ChecksumException("Checksum failed at " + failedPos, + failedPos); + } + dLeft -= dLen; + dOff += dLen; + cOff += checksumSize; + } + } + /** * sendBlock() is used to read block and its metadata and stream the data to * either a client or to another datanode. @@ -433,70 +531,54 @@ private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) * {@link SocketOutputStream#transferToFully(FileChannel, * long, int)}. * @param throttler for sending data. - * @return total bytes reads, including crc. + * @return total bytes read, including checksum data. */ long sendBlock(DataOutputStream out, OutputStream baseStream, DataTransferThrottler throttler) throws IOException { - if( out == null ) { + if (out == null) { throw new IOException( "out stream is null" ); } - this.throttler = throttler; - - long initialOffset = offset; + final long initialOffset = offset; long totalRead = 0; OutputStream streamForSendChunks = out; final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0; try { - try { - checksum.writeHeader(out); - if ( chunkOffsetOK ) { - out.writeLong( offset ); - } - out.flush(); - } catch (IOException e) { //socket error - throw ioeToSocketException(e); - } + writeChecksumHeader(out); int maxChunksPerPacket; int pktSize = PacketHeader.PKT_HEADER_LEN; - - if (transferToAllowed && !verifyChecksum && - baseStream instanceof SocketOutputStream && - blockIn instanceof FileInputStream) { - + boolean transferTo = transferToAllowed && !verifyChecksum + && baseStream instanceof SocketOutputStream + && blockIn instanceof FileInputStream; + if (transferTo) { FileChannel fileChannel = ((FileInputStream)blockIn).getChannel(); - - // blockInPosition also indicates sendChunks() uses transferTo. blockInPosition = fileChannel.position(); streamForSendChunks = baseStream; + maxChunksPerPacket = numberOfChunks(TRANSFERTO_BUFFER_SIZE); - // assure a mininum buffer size. - maxChunksPerPacket = (Math.max(HdfsConstants.IO_FILE_BUFFER_SIZE, - MIN_BUFFER_WITH_TRANSFERTO) - + bytesPerChecksum - 1)/bytesPerChecksum; - - // allocate smaller buffer while using transferTo(). + // Smaller packet size to only hold checksum when doing transferTo pktSize += checksumSize * maxChunksPerPacket; } else { - maxChunksPerPacket = Math.max(1, (HdfsConstants.IO_FILE_BUFFER_SIZE - + bytesPerChecksum - 1) / bytesPerChecksum); - pktSize += (bytesPerChecksum + checksumSize) * maxChunksPerPacket; + maxChunksPerPacket = Math.max(1, + numberOfChunks(HdfsConstants.IO_FILE_BUFFER_SIZE)); + // Packet size includes both checksum and data + pktSize += (chunkSize + checksumSize) * maxChunksPerPacket; } ByteBuffer pktBuf = ByteBuffer.allocate(pktSize); while (endOffset > offset) { - long len = sendChunks(pktBuf, maxChunksPerPacket, - streamForSendChunks); + long len = sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, + transferTo, throttler); offset += len; - totalRead += len + ((len + bytesPerChecksum - 1)/bytesPerChecksum* - checksumSize); + totalRead += len + (numberOfChunks(len) * checksumSize); seqno++; } try { // send an empty packet to mark the end of the block - sendChunks(pktBuf, maxChunksPerPacket, streamForSendChunks); + sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo, + throttler); out.flush(); } catch (IOException e) { //socket error throw ioeToSocketException(e); @@ -506,14 +588,39 @@ long sendBlock(DataOutputStream out, OutputStream baseStream, } finally { if (clientTraceFmt != null) { final long endTime = System.nanoTime(); - ClientTraceLog.info(String.format(clientTraceFmt, totalRead, initialOffset, endTime - startTime)); + ClientTraceLog.info(String.format(clientTraceFmt, totalRead, + initialOffset, endTime - startTime)); } close(); } - return totalRead; } + /** + * Write checksum header to the output stream + */ + private void writeChecksumHeader(DataOutputStream out) throws IOException { + try { + checksum.writeHeader(out); + if (chunkOffsetOK) { + out.writeLong(offset); + } + out.flush(); + } catch (IOException e) { //socket error + throw ioeToSocketException(e); + } + } + + /** + * Write packet header into {@code pkt} + */ + private void writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) { + pkt.clear(); + PacketHeader header = new PacketHeader(packetLen, offset, seqno, + (dataLen == 0), dataLen); + header.putInBuffer(pkt); + } + boolean didSendEntireByteRange() { return sentEntireByteRange; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index b6ec0c05b4..d4f5bc19f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -2058,7 +2058,7 @@ public void run() { out = new DataOutputStream(new BufferedOutputStream(baseStream, HdfsConstants.SMALL_BUFFER_SIZE)); blockSender = new BlockSender(b, 0, b.getNumBytes(), - false, false, false, DataNode.this); + false, false, false, DataNode.this, null); DatanodeInfo srcNode = new DatanodeInfo(bpReg); // diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 8d7d95f8aa..fdcdc18a34 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -597,7 +597,7 @@ public void copyBlock(final ExtendedBlock block, try { // check if the block exists or not blockSender = new BlockSender(block, 0, -1, false, false, false, - datanode); + datanode, null); // set up response stream OutputStream baseStream = NetUtils.getOutputStream( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java index 0305024e4f..dd53da3279 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java @@ -46,10 +46,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.hdfs.web.ParamFilter; import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.hdfs.web.resources.BlockSizeParam; @@ -219,13 +221,13 @@ public Response run() throws IOException { final String fullpath = path.getAbsolutePath(); final DataNode datanode = (DataNode)context.getAttribute("datanode"); + final Configuration conf = new Configuration(datanode.getConf()); + final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf); + final DFSClient dfsclient = new DFSClient(nnRpcAddr, conf); switch(op.getValue()) { case OPEN: { - final Configuration conf = new Configuration(datanode.getConf()); - final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf); - final DFSClient dfsclient = new DFSClient(nnRpcAddr, conf); final int b = bufferSize.getValue(conf); final DFSDataInputStream in = new DFSClient.DFSDataInputStream( dfsclient.open(fullpath, b, true)); @@ -244,6 +246,12 @@ public void write(final OutputStream out) throws IOException { }; return Response.ok(streaming).type(MediaType.APPLICATION_OCTET_STREAM).build(); } + case GETFILECHECKSUM: + { + final MD5MD5CRC32FileChecksum checksum = dfsclient.getFileChecksum(fullpath); + final String js = JsonUtil.toJsonString(checksum); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } default: throw new UnsupportedOperationException(op + " is not supported"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java index 5dd82393fd..836c22d014 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java @@ -54,7 +54,6 @@ class EditLogBackupOutputStream extends EditLogOutputStream { this.nnRegistration = nnReg; InetSocketAddress bnAddress = NetUtils.createSocketAddr(bnRegistration.getAddress()); - Storage.LOG.info("EditLogBackupOutputStream connects to: " + bnAddress); try { this.backupNode = RPC.getProxy(JournalProtocol.class, @@ -67,16 +66,6 @@ class EditLogBackupOutputStream extends EditLogOutputStream { this.out = new DataOutputBuffer(DEFAULT_BUFFER_SIZE); } - @Override // JournalStream - public String getName() { - return bnRegistration.getAddress(); - } - - @Override // JournalStream - public JournalType getType() { - return JournalType.BACKUP; - } - @Override // EditLogOutputStream void write(FSEditLogOp op) throws IOException { doubleBuf.writeOp(op); @@ -141,16 +130,6 @@ protected void flushAndSync() throws IOException { } } - /** - * There is no persistent storage. Therefore length is 0.

- * Length is used to check when it is large enough to start a checkpoint. - * This criteria should not be used for backup streams. - */ - @Override // EditLogOutputStream - long length() throws IOException { - return 0; - } - /** * Get backup node registration. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java index be75f637a9..4780d04b00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java @@ -37,9 +37,7 @@ * stores edits in a local file. */ class EditLogFileOutputStream extends EditLogOutputStream { - private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class);; - - private static int EDITS_FILE_HEADER_SIZE_BYTES = Integer.SIZE / Byte.SIZE; + private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class); private File file; private FileOutputStream fp; // file stream for storing edit logs @@ -73,16 +71,6 @@ class EditLogFileOutputStream extends EditLogOutputStream { fc.position(fc.size()); } - @Override // JournalStream - public String getName() { - return file.getPath(); - } - - @Override // JournalStream - public JournalType getType() { - return JournalType.FILE; - } - /** {@inheritDoc} */ @Override void write(FSEditLogOp op) throws IOException { @@ -176,7 +164,10 @@ protected void flushAndSync() throws IOException { if (fp == null) { throw new IOException("Trying to use aborted output stream"); } - + if (doubleBuf.isFlushed()) { + LOG.info("Nothing to flush"); + return; + } preallocate(); // preallocate file if necessary doubleBuf.flushTo(fp); fc.force(false); // metadata updates not needed because of preallocation @@ -190,16 +181,6 @@ protected void flushAndSync() throws IOException { public boolean shouldForceSync() { return doubleBuf.shouldForceSync(); } - - /** - * Return the size of the current edit log including buffered data. - */ - @Override - long length() throws IOException { - // file size - header size + size of both buffers - return fc.size() - EDITS_FILE_HEADER_SIZE_BYTES + - doubleBuf.countBufferedBytes(); - } // allocate a big chunk of data private void preallocate() throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java index 8577db8e45..8681837de5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java @@ -18,23 +18,20 @@ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; -import java.util.zip.Checksum; import static org.apache.hadoop.hdfs.server.common.Util.now; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.Writable; /** * A generic abstract class to support journaling of edits logs into * a persistent storage. */ -abstract class EditLogOutputStream implements JournalStream { +abstract class EditLogOutputStream { // these are statistics counters private long numSync; // number of sync(s) to disk private long totalTimeSync; // total time to sync - EditLogOutputStream() throws IOException { + EditLogOutputStream() { numSync = totalTimeSync = 0; } @@ -105,12 +102,6 @@ public void flush() throws IOException { totalTimeSync += (end - start); } - /** - * Return the size of the current edits log. - * Length is used to check when it is large enough to start a checkpoint. - */ - abstract long length() throws IOException; - /** * Implement the policy when to automatically sync the buffered edits log * The buffered edits can be flushed when the buffer becomes full or @@ -132,12 +123,7 @@ long getTotalSyncTime() { /** * Return number of calls to {@link #flushAndSync()} */ - long getNumSync() { + protected long getNumSync() { return numSync; } - - @Override // Object - public String toString() { - return getName(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index e355a9d838..4a41a2cbd6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -17,12 +17,12 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.hdfs.server.common.Util.now; + import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; import java.util.List; -import java.util.SortedSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,25 +34,17 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; -import static org.apache.hadoop.hdfs.server.common.Util.now; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.*; +import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; -import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; -import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; -import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.security.token.delegation.DelegationKey; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableListMultimap; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Sets; - -import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.*; /** * FSEditLog maintains a log of the namespace modifications. @@ -62,9 +54,6 @@ @InterfaceStability.Evolving public class FSEditLog { - static final String NO_JOURNAL_STREAMS_WARNING = "!!! WARNING !!!" + - " File system changes are not persistent. No journal streams."; - static final Log LOG = LogFactory.getLog(FSEditLog.class); /** @@ -82,10 +71,11 @@ private enum State { CLOSED; } private State state = State.UNINITIALIZED; + + //initialize + final private JournalSet journalSet; + private EditLogOutputStream editLogStream = null; - - private List journals = Lists.newArrayList(); - // a monotonically increasing counter that represents transactionIds. private long txid = 0; @@ -137,15 +127,15 @@ protected synchronized TransactionId initialValue() { this.storage = storage; metrics = NameNode.getNameNodeMetrics(); lastPrintTime = now(); - + + this.journalSet = new JournalSet(); for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) { - journals.add(new JournalAndStream(new FileJournalManager(sd))); + journalSet.add(new FileJournalManager(sd)); } - if (journals.isEmpty()) { + if (journalSet.isEmpty()) { LOG.error("No edits directories configured!"); - } - + } state = State.BETWEEN_LOG_SEGMENTS; } @@ -172,9 +162,8 @@ synchronized void close() { LOG.warn("Closing log when already closed", new Exception()); return; } - if (state == State.IN_SEGMENT) { - assert !journals.isEmpty(); + assert editLogStream != null; waitForSyncToFinish(); endCurrentLogSegment(true); } @@ -193,20 +182,14 @@ void logEdit(final FSEditLogOp op) { // wait if an automatic sync is scheduled waitIfAutoSyncScheduled(); - if (journals.isEmpty()) { - throw new java.lang.IllegalStateException(NO_JOURNAL_STREAMS_WARNING); - } - long start = beginTransaction(); op.setTransactionId(txid); - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - if (!jas.isActive()) return; - jas.stream.write(op); - } - }, "logging edit"); + try { + editLogStream.write(op); + } catch (IOException ex) { + // All journals failed, it is handled in logSync. + } endTransaction(start); @@ -251,14 +234,7 @@ synchronized void doneWithAutoSyncScheduling() { * @return true if any of the edit stream says that it should sync */ private boolean shouldForceSync() { - for (JournalAndStream jas : journals) { - if (!jas.isActive()) continue; - - if (jas.getCurrentStream().shouldForceSync()) { - return true; - } - } - return false; + return editLogStream.shouldForceSync(); } private long beginTransaction() { @@ -322,7 +298,7 @@ synchronized void setNextTxId(long nextTxId) { * NOTE: this should be done while holding the FSNamesystem lock, or * else more operations can start writing while this is in progress. */ - void logSyncAll() throws IOException { + void logSyncAll() { // Record the most recent transaction ID as our own id synchronized (this) { TransactionId id = myTransactionId.get(); @@ -366,74 +342,73 @@ public void logSync() { // Fetch the transactionId of this thread. long mytxid = myTransactionId.get().txid; - List candidateJournals = - Lists.newArrayListWithCapacity(journals.size()); - List badJournals = Lists.newArrayList(); - boolean sync = false; try { + EditLogOutputStream logStream = null; synchronized (this) { try { - printStatistics(false); - - // if somebody is already syncing, then wait - while (mytxid > synctxid && isSyncRunning) { - try { - wait(1000); - } catch (InterruptedException ie) { + printStatistics(false); + + // if somebody is already syncing, then wait + while (mytxid > synctxid && isSyncRunning) { + try { + wait(1000); + } catch (InterruptedException ie) { + } } - } - // - // If this transaction was already flushed, then nothing to do - // - if (mytxid <= synctxid) { - numTransactionsBatchedInSync++; - if (metrics != null) // Metrics is non-null only when used inside name node - metrics.incrTransactionsBatchedInSync(); - return; - } + // + // If this transaction was already flushed, then nothing to do + // + if (mytxid <= synctxid) { + numTransactionsBatchedInSync++; + if (metrics != null) { + // Metrics is non-null only when used inside name node + metrics.incrTransactionsBatchedInSync(); + } + return; + } - // now, this thread will do the sync - syncStart = txid; - isSyncRunning = true; - sync = true; + // now, this thread will do the sync + syncStart = txid; + isSyncRunning = true; + sync = true; - // swap buffers - assert !journals.isEmpty() : "no editlog streams"; - - for (JournalAndStream jas : journals) { - if (!jas.isActive()) continue; + // swap buffers try { - jas.getCurrentStream().setReadyToFlush(); - candidateJournals.add(jas); - } catch (IOException ie) { - LOG.error("Unable to get ready to flush.", ie); - badJournals.add(jas); + if (journalSet.isEmpty()) { + throw new IOException("No journals available to flush"); + } + editLogStream.setReadyToFlush(); + } catch (IOException e) { + LOG.fatal("Could not sync any journal to persistent storage. " + + "Unsynced transactions: " + (txid - synctxid), + new Exception()); + runtime.exit(1); } - } } finally { // Prevent RuntimeException from blocking other log edit write doneWithAutoSyncScheduling(); } + //editLogStream may become null, + //so store a local variable for flush. + logStream = editLogStream; } - + // do the sync long start = now(); - for (JournalAndStream jas : candidateJournals) { - if (!jas.isActive()) continue; - try { - jas.getCurrentStream().flush(); - } catch (IOException ie) { - LOG.error("Unable to sync edit log.", ie); - // - // remember the streams that encountered an error. - // - badJournals.add(jas); + try { + if (logStream != null) { + logStream.flush(); + } + } catch (IOException ex) { + synchronized (this) { + LOG.fatal("Could not sync any journal to persistent storage. " + + "Unsynced transactions: " + (txid - synctxid), new Exception()); + runtime.exit(1); } } long elapsed = now() - start; - disableAndReportErrorOnJournals(badJournals); if (metrics != null) { // Metrics non-null only when used inside name node metrics.addSync(elapsed); @@ -443,13 +418,6 @@ public void logSync() { // Prevent RuntimeException from blocking other log edit sync synchronized (this) { if (sync) { - if (badJournals.size() >= journals.size()) { - LOG.fatal("Could not sync any journal to persistent storage. " + - "Unsynced transactions: " + (txid - synctxid), - new Exception()); - runtime.exit(1); - } - synctxid = syncStart; isSyncRunning = false; } @@ -466,9 +434,6 @@ private void printStatistics(boolean force) { if (lastPrintTime + 60000 > now && !force) { return; } - if (journals.isEmpty()) { - return; - } lastPrintTime = now; StringBuilder buf = new StringBuilder(); buf.append("Number of transactions: "); @@ -478,20 +443,9 @@ private void printStatistics(boolean force) { buf.append("Number of transactions batched in Syncs: "); buf.append(numTransactionsBatchedInSync); buf.append(" Number of syncs: "); - for (JournalAndStream jas : journals) { - if (!jas.isActive()) continue; - buf.append(jas.getCurrentStream().getNumSync()); - break; - } - + buf.append(editLogStream.getNumSync()); buf.append(" SyncTimes(ms): "); - - for (JournalAndStream jas : journals) { - if (!jas.isActive()) continue; - EditLogOutputStream eStream = jas.getCurrentStream(); - buf.append(eStream.getTotalSyncTime()); - buf.append(" "); - } + buf.append(journalSet.getSyncTimes()); LOG.info(buf); } @@ -664,7 +618,6 @@ void logSymlink(String path, String value, long mtime, * log delegation token to edit log * @param id DelegationTokenIdentifier * @param expiryTime of the token - * @return */ void logGetDelegationToken(DelegationTokenIdentifier id, long expiryTime) { @@ -702,25 +655,12 @@ void logReassignLease(String leaseHolder, String src, String newHolder) { logEdit(op); } - /** - * @return the number of active (non-failed) journals - */ - private int countActiveJournals() { - int count = 0; - for (JournalAndStream jas : journals) { - if (jas.isActive()) { - count++; - } - } - return count; - } - /** * Used only by unit tests. */ @VisibleForTesting List getJournals() { - return journals; + return journalSet.getAllJournalStreams(); } /** @@ -734,62 +674,9 @@ synchronized void setRuntimeForTesting(Runtime runtime) { /** * Return a manifest of what finalized edit logs are available */ - public synchronized RemoteEditLogManifest getEditLogManifest( - long fromTxId) throws IOException { - // Collect RemoteEditLogs available from each FileJournalManager - List allLogs = Lists.newArrayList(); - for (JournalAndStream j : journals) { - if (j.getManager() instanceof FileJournalManager) { - FileJournalManager fjm = (FileJournalManager)j.getManager(); - try { - allLogs.addAll(fjm.getRemoteEditLogs(fromTxId)); - } catch (Throwable t) { - LOG.warn("Cannot list edit logs in " + fjm, t); - } - } - } - - // Group logs by their starting txid - ImmutableListMultimap logsByStartTxId = - Multimaps.index(allLogs, RemoteEditLog.GET_START_TXID); - long curStartTxId = fromTxId; - - List logs = Lists.newArrayList(); - while (true) { - ImmutableList logGroup = logsByStartTxId.get(curStartTxId); - if (logGroup.isEmpty()) { - // we have a gap in logs - for example because we recovered some old - // storage directory with ancient logs. Clear out any logs we've - // accumulated so far, and then skip to the next segment of logs - // after the gap. - SortedSet startTxIds = Sets.newTreeSet(logsByStartTxId.keySet()); - startTxIds = startTxIds.tailSet(curStartTxId); - if (startTxIds.isEmpty()) { - break; - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Found gap in logs at " + curStartTxId + ": " + - "not returning previous logs in manifest."); - } - logs.clear(); - curStartTxId = startTxIds.first(); - continue; - } - } - - // Find the one that extends the farthest forward - RemoteEditLog bestLog = Collections.max(logGroup); - logs.add(bestLog); - // And then start looking from after that point - curStartTxId = bestLog.getEndTxId() + 1; - } - RemoteEditLogManifest ret = new RemoteEditLogManifest(logs); - - if (LOG.isDebugEnabled()) { - LOG.debug("Generated manifest for logs since " + fromTxId + ":" - + ret); - } - return ret; + public synchronized RemoteEditLogManifest getEditLogManifest(long fromTxId) + throws IOException { + return journalSet.getEditLogManifest(fromTxId); } /** @@ -832,14 +719,9 @@ synchronized void startLogSegment(final long segmentTxId, // See HDFS-2174. storage.attemptRestoreRemovedStorage(); - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - jas.startLogSegment(segmentTxId); - } - }, "starting log segment " + segmentTxId); - - if (countActiveJournals() == 0) { + try { + editLogStream = journalSet.startLogSegment(segmentTxId); + } catch (IOException ex) { throw new IOException("Unable to start log segment " + segmentTxId + ": no journals successfully started."); } @@ -873,14 +755,12 @@ synchronized void endCurrentLogSegment(boolean writeEndTxn) { final long lastTxId = getLastWrittenTxId(); - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - if (jas.isActive()) { - jas.close(lastTxId); - } - } - }, "ending log segment"); + try { + journalSet.finalizeLogSegment(curSegmentTxId, lastTxId); + editLogStream = null; + } catch (IOException e) { + //All journals have failed, it will be handled in logSync. + } state = State.BETWEEN_LOG_SEGMENTS; } @@ -889,14 +769,15 @@ public void apply(JournalAndStream jas) throws IOException { * Abort all current logs. Called from the backup node. */ synchronized void abortCurrentLogSegment() { - mapJournalsAndReportErrors(new JournalClosure() { - - @Override - public void apply(JournalAndStream jas) throws IOException { - jas.abort(); + try { + //Check for null, as abort can be called any time. + if (editLogStream != null) { + editLogStream.abort(); + editLogStream = null; } - }, "aborting all streams"); - state = State.BETWEEN_LOG_SEGMENTS; + } catch (IOException e) { + LOG.warn("All journals failed to abort", e); + } } /** @@ -912,13 +793,12 @@ public void purgeLogsOlderThan(final long minTxIdToKeep) { "cannot purge logs older than txid " + minTxIdToKeep + " when current segment starts at " + curSegmentTxId; } - - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - jas.manager.purgeLogsOlderThan(minTxIdToKeep); - } - }, "purging logs older than " + minTxIdToKeep); + + try { + journalSet.purgeLogsOlderThan(minTxIdToKeep); + } catch (IOException ex) { + //All journals have failed, it will be handled in logSync. + } } @@ -946,9 +826,7 @@ synchronized long getSyncTxId() { // sets the initial capacity of the flush buffer. public void setOutputBufferCapacity(int size) { - for (JournalAndStream jas : journals) { - jas.manager.setOutputBufferCapacity(size); - } + journalSet.setOutputBufferCapacity(size); } /** @@ -969,7 +847,7 @@ synchronized void registerBackupNode( if(bnReg.isRole(NamenodeRole.CHECKPOINT)) return; // checkpoint node does not stream edits - JournalAndStream jas = findBackupJournalAndStream(bnReg); + JournalManager jas = findBackupJournal(bnReg); if (jas != null) { // already registered LOG.info("Backup node " + bnReg + " re-registers"); @@ -978,35 +856,29 @@ synchronized void registerBackupNode( LOG.info("Registering new backup node: " + bnReg); BackupJournalManager bjm = new BackupJournalManager(bnReg, nnReg); - journals.add(new JournalAndStream(bjm)); + journalSet.add(bjm); } - synchronized void releaseBackupStream(NamenodeRegistration registration) { - for (Iterator iter = journals.iterator(); - iter.hasNext();) { - JournalAndStream jas = iter.next(); - if (jas.manager instanceof BackupJournalManager && - ((BackupJournalManager)jas.manager).matchesRegistration( - registration)) { - jas.abort(); - LOG.info("Removing backup journal " + jas); - iter.remove(); - } + synchronized void releaseBackupStream(NamenodeRegistration registration) + throws IOException { + BackupJournalManager bjm = this.findBackupJournal(registration); + if (bjm != null) { + LOG.info("Removing backup journal " + bjm); + journalSet.remove(bjm); } } /** * Find the JournalAndStream associated with this BackupNode. + * * @return null if it cannot be found */ - private synchronized JournalAndStream findBackupJournalAndStream( + private synchronized BackupJournalManager findBackupJournal( NamenodeRegistration bnReg) { - for (JournalAndStream jas : journals) { - if (jas.manager instanceof BackupJournalManager) { - BackupJournalManager bjm = (BackupJournalManager)jas.manager; - if (bjm.matchesRegistration(bnReg)) { - return jas; - } + for (JournalManager bjm : journalSet.getJournalManagers()) { + if ((bjm instanceof BackupJournalManager) + && ((BackupJournalManager) bjm).matchesRegistration(bnReg)) { + return (BackupJournalManager) bjm; } } return null; @@ -1018,124 +890,24 @@ private synchronized JournalAndStream findBackupJournalAndStream( */ synchronized void logEdit(final int length, final byte[] data) { long start = beginTransaction(); - - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - if (jas.isActive()) { - jas.getCurrentStream().writeRaw(data, 0, length); // TODO writeRaw - } - } - }, "Logging edit"); + try { + editLogStream.writeRaw(data, 0, length); + } catch (IOException ex) { + // All journals have failed, it will be handled in logSync. + } endTransaction(start); } - //// Iteration across journals - private interface JournalClosure { - public void apply(JournalAndStream jas) throws IOException; - } - - /** - * Apply the given function across all of the journal managers, disabling - * any for which the closure throws an IOException. - * @param status message used for logging errors (e.g. "opening journal") - */ - private void mapJournalsAndReportErrors( - JournalClosure closure, String status) { - List badJAS = Lists.newLinkedList(); - for (JournalAndStream jas : journals) { - try { - closure.apply(jas); - } catch (Throwable t) { - LOG.error("Error " + status + " (journal " + jas + ")", t); - badJAS.add(jas); - } - } - - disableAndReportErrorOnJournals(badJAS); - } - - /** - * Called when some journals experience an error in some operation. - * This propagates errors to the storage level. - */ - private void disableAndReportErrorOnJournals(List badJournals) { - if (badJournals == null || badJournals.isEmpty()) { - return; // nothing to do - } - - for (JournalAndStream j : badJournals) { - LOG.error("Disabling journal " + j); - j.abort(); - } - } - - /** - * Find the best editlog input stream to read from txid. In this case - * best means the editlog which has the largest continuous range of - * transactions starting from the transaction id, fromTxId. - * - * If a journal throws an CorruptionException while reading from a txn id, - * it means that it has more transactions, but can't find any from fromTxId. - * If this is the case and no other journal has transactions, we should throw - * an exception as it means more transactions exist, we just can't load them. - * - * @param fromTxId Transaction id to start from. - * @return a edit log input stream with tranactions fromTxId - * or null if no more exist - */ - private EditLogInputStream selectStream(long fromTxId) - throws IOException { - JournalManager bestjm = null; - long bestjmNumTxns = 0; - CorruptionException corruption = null; - - for (JournalAndStream jas : journals) { - JournalManager candidate = jas.getManager(); - long candidateNumTxns = 0; - try { - candidateNumTxns = candidate.getNumberOfTransactions(fromTxId); - } catch (CorruptionException ce) { - corruption = ce; - } catch (IOException ioe) { - LOG.warn("Error reading number of transactions from " + candidate); - continue; // error reading disk, just skip - } - - if (candidateNumTxns > bestjmNumTxns) { - bestjm = candidate; - bestjmNumTxns = candidateNumTxns; - } - } - - - if (bestjm == null) { - /** - * If all candidates either threw a CorruptionException or - * found 0 transactions, then a gap exists. - */ - if (corruption != null) { - throw new IOException("Gap exists in logs from " - + fromTxId, corruption); - } else { - return null; - } - } - - return bestjm.getInputStream(fromTxId); - } - /** * Run recovery on all journals to recover any unclosed segments */ void recoverUnclosedStreams() { - mapJournalsAndReportErrors(new JournalClosure() { - @Override - public void apply(JournalAndStream jas) throws IOException { - jas.manager.recoverUnfinalizedSegments(); - } - }, "recovering unclosed streams"); + try { + journalSet.recoverUnfinalizedSegments(); + } catch (IOException ex) { + // All journals have failed, it is handled in logSync. + } } /** @@ -1143,23 +915,16 @@ public void apply(JournalAndStream jas) throws IOException { * @param fromTxId first transaction in the selected streams * @param toAtLeast the selected streams must contain this transaction */ - Collection selectInputStreams(long fromTxId, long toAtLeastTxId) - throws IOException { - List streams = Lists.newArrayList(); - - boolean gapFound = false; - EditLogInputStream stream = selectStream(fromTxId); + Collection selectInputStreams(long fromTxId, + long toAtLeastTxId) throws IOException { + List streams = new ArrayList(); + EditLogInputStream stream = journalSet.getInputStream(fromTxId); while (stream != null) { fromTxId = stream.getLastTxId() + 1; streams.add(stream); - try { - stream = selectStream(fromTxId); - } catch (IOException ioe) { - gapFound = true; - break; - } + stream = journalSet.getInputStream(fromTxId); } - if (fromTxId <= toAtLeastTxId || gapFound) { + if (fromTxId <= toAtLeastTxId) { closeAllStreams(streams); throw new IOException("No non-corrupt logs for txid " + fromTxId); @@ -1176,75 +941,4 @@ static void closeAllStreams(Iterable streams) { IOUtils.closeStream(s); } } - - /** - * Container for a JournalManager paired with its currently - * active stream. - * - * If a Journal gets disabled due to an error writing to its - * stream, then the stream will be aborted and set to null. - */ - static class JournalAndStream { - private final JournalManager manager; - private EditLogOutputStream stream; - private long segmentStartsAtTxId = HdfsConstants.INVALID_TXID; - - private JournalAndStream(JournalManager manager) { - this.manager = manager; - } - - private void startLogSegment(long txId) throws IOException { - Preconditions.checkState(stream == null); - stream = manager.startLogSegment(txId); - segmentStartsAtTxId = txId; - } - - private void close(long lastTxId) throws IOException { - Preconditions.checkArgument(lastTxId >= segmentStartsAtTxId, - "invalid segment: lastTxId %s >= " + - "segment starting txid %s", lastTxId, segmentStartsAtTxId); - - if (stream == null) return; - stream.close(); - manager.finalizeLogSegment(segmentStartsAtTxId, lastTxId); - stream = null; - } - - @VisibleForTesting - void abort() { - if (stream == null) return; - try { - stream.abort(); - } catch (IOException ioe) { - LOG.error("Unable to abort stream " + stream, ioe); - } - stream = null; - segmentStartsAtTxId = HdfsConstants.INVALID_TXID; - } - - private boolean isActive() { - return stream != null; - } - - @VisibleForTesting - EditLogOutputStream getCurrentStream() { - return stream; - } - - @Override - public String toString() { - return "JournalAndStream(mgr=" + manager + - ", " + "stream=" + stream + ")"; - } - - @VisibleForTesting - void setCurrentStreamForTests(EditLogOutputStream stream) { - this.stream = stream; - } - - @VisibleForTesting - JournalManager getManager() { - return manager; - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index 6e4c17161a..8cfc975823 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -23,7 +23,6 @@ import java.io.File; import java.io.IOException; import java.util.List; -import java.util.HashMap; import java.util.Comparator; import java.util.Collections; import java.util.regex.Matcher; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java index 8440fe049b..0bb7b0f8aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java @@ -19,7 +19,6 @@ import java.io.IOException; -import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger; /** * A JournalManager is responsible for managing a single place of storing diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java new file mode 100644 index 0000000000..0d6bc743da --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java @@ -0,0 +1,549 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; +import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.Lists; +import com.google.common.collect.Multimaps; +import com.google.common.collect.Sets; + +/** + * Manages a collection of Journals. None of the methods are synchronized, it is + * assumed that FSEditLog methods, that use this class, use proper + * synchronization. + */ +public class JournalSet implements JournalManager { + + static final Log LOG = LogFactory.getLog(FSEditLog.class); + + /** + * Container for a JournalManager paired with its currently + * active stream. + * + * If a Journal gets disabled due to an error writing to its + * stream, then the stream will be aborted and set to null. + * + * This should be used outside JournalSet only for testing. + */ + @VisibleForTesting + static class JournalAndStream { + private final JournalManager journal; + private boolean disabled = false; + private EditLogOutputStream stream; + + public JournalAndStream(JournalManager manager) { + this.journal = manager; + } + + public void startLogSegment(long txId) throws IOException { + Preconditions.checkState(stream == null); + disabled = false; + stream = journal.startLogSegment(txId); + } + + /** + * Closes the stream, also sets it to null. + */ + public void close() throws IOException { + if (stream == null) return; + stream.close(); + stream = null; + } + + /** + * Aborts the stream, also sets it to null. + */ + public void abort() { + if (stream == null) return; + try { + stream.abort(); + } catch (IOException ioe) { + LOG.error("Unable to abort stream " + stream, ioe); + } + stream = null; + } + + boolean isActive() { + return stream != null; + } + + /** + * Should be used outside JournalSet only for testing. + */ + EditLogOutputStream getCurrentStream() { + return stream; + } + + @Override + public String toString() { + return "JournalAndStream(mgr=" + journal + + ", " + "stream=" + stream + ")"; + } + + void setCurrentStreamForTests(EditLogOutputStream stream) { + this.stream = stream; + } + + JournalManager getManager() { + return journal; + } + + private boolean isDisabled() { + return disabled; + } + + private void setDisabled(boolean disabled) { + this.disabled = disabled; + } + } + + private List journals = Lists.newArrayList(); + + @Override + public EditLogOutputStream startLogSegment(final long txId) throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.startLogSegment(txId); + } + }, "starting log segment " + txId); + return new JournalSetOutputStream(); + } + + @Override + public void finalizeLogSegment(final long firstTxId, final long lastTxId) + throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.close(); + jas.getManager().finalizeLogSegment(firstTxId, lastTxId); + } + } + }, "finalize log segment " + firstTxId + ", " + lastTxId); + } + + + /** + * Find the best editlog input stream to read from txid. + * If a journal throws an CorruptionException while reading from a txn id, + * it means that it has more transactions, but can't find any from fromTxId. + * If this is the case and no other journal has transactions, we should throw + * an exception as it means more transactions exist, we just can't load them. + * + * @param fromTxnId Transaction id to start from. + * @return A edit log input stream with tranactions fromTxId + * or null if no more exist + */ + @Override + public EditLogInputStream getInputStream(long fromTxnId) throws IOException { + JournalManager bestjm = null; + long bestjmNumTxns = 0; + CorruptionException corruption = null; + + for (JournalAndStream jas : journals) { + JournalManager candidate = jas.getManager(); + long candidateNumTxns = 0; + try { + candidateNumTxns = candidate.getNumberOfTransactions(fromTxnId); + } catch (CorruptionException ce) { + corruption = ce; + } catch (IOException ioe) { + continue; // error reading disk, just skip + } + + if (candidateNumTxns > bestjmNumTxns) { + bestjm = candidate; + bestjmNumTxns = candidateNumTxns; + } + } + + if (bestjm == null) { + if (corruption != null) { + throw new IOException("No non-corrupt logs for txid " + + fromTxnId, corruption); + } else { + return null; + } + } + return bestjm.getInputStream(fromTxnId); + } + + @Override + public long getNumberOfTransactions(long fromTxnId) throws IOException { + long num = 0; + for (JournalAndStream jas: journals) { + if (jas.isActive()) { + long newNum = jas.getManager().getNumberOfTransactions(fromTxnId); + if (newNum > num) { + num = newNum; + } + } + } + return num; + } + + /** + * Returns true if there are no journals or all are disabled. + * @return True if no journals or all are disabled. + */ + public boolean isEmpty() { + for (JournalAndStream jas : journals) { + if (!jas.isDisabled()) { + return false; + } + } + return true; + } + + /** + * Called when some journals experience an error in some operation. + */ + private void disableAndReportErrorOnJournals(List badJournals) { + if (badJournals == null || badJournals.isEmpty()) { + return; // nothing to do + } + + for (JournalAndStream j : badJournals) { + LOG.error("Disabling journal " + j); + j.abort(); + j.setDisabled(true); + } + } + + /** + * Implementations of this interface encapsulate operations that can be + * iteratively applied on all the journals. For example see + * {@link JournalSet#mapJournalsAndReportErrors}. + */ + private interface JournalClosure { + /** + * The operation on JournalAndStream. + * @param jas Object on which operations are performed. + * @throws IOException + */ + public void apply(JournalAndStream jas) throws IOException; + } + + /** + * Apply the given operation across all of the journal managers, disabling + * any for which the closure throws an IOException. + * @param closure {@link JournalClosure} object encapsulating the operation. + * @param status message used for logging errors (e.g. "opening journal") + * @throws IOException If the operation fails on all the journals. + */ + private void mapJournalsAndReportErrors( + JournalClosure closure, String status) throws IOException{ + List badJAS = Lists.newLinkedList(); + for (JournalAndStream jas : journals) { + try { + closure.apply(jas); + } catch (Throwable t) { + LOG.error("Error: " + status + " failed for (journal " + jas + ")", t); + badJAS.add(jas); + } + } + disableAndReportErrorOnJournals(badJAS); + if (badJAS.size() >= journals.size()) { + LOG.error("Error: "+status+" failed for all journals"); + throw new IOException(status+" failed on all the journals"); + } + } + + /** + * An implementation of EditLogOutputStream that applies a requested method on + * all the journals that are currently active. + */ + private class JournalSetOutputStream extends EditLogOutputStream { + + JournalSetOutputStream() throws IOException { + super(); + } + + @Override + void write(final FSEditLogOp op) + throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().write(op); + } + } + }, "write op"); + } + + @Override + void writeRaw(final byte[] data, final int offset, final int length) + throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().writeRaw(data, offset, length); + } + } + }, "write bytes"); + } + + @Override + void create() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().create(); + } + } + }, "create"); + } + + @Override + public void close() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.close(); + } + }, "close"); + } + + @Override + public void abort() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.abort(); + } + }, "abort"); + } + + @Override + void setReadyToFlush() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().setReadyToFlush(); + } + } + }, "setReadyToFlush"); + } + + @Override + protected void flushAndSync() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().flushAndSync(); + } + } + }, "flushAndSync"); + } + + @Override + public void flush() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + if (jas.isActive()) { + jas.getCurrentStream().flush(); + } + } + }, "flush"); + } + + @Override + public boolean shouldForceSync() { + for (JournalAndStream js : journals) { + if (js.isActive() && js.getCurrentStream().shouldForceSync()) { + return true; + } + } + return false; + } + + @Override + protected long getNumSync() { + for (JournalAndStream jas : journals) { + if (jas.isActive()) { + return jas.getCurrentStream().getNumSync(); + } + } + return 0; + } + } + + @Override + public void setOutputBufferCapacity(final int size) { + try { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.getManager().setOutputBufferCapacity(size); + } + }, "setOutputBufferCapacity"); + } catch (IOException e) { + LOG.error("Error in setting outputbuffer capacity"); + } + } + + @VisibleForTesting + List getAllJournalStreams() { + return journals; + } + + List getJournalManagers() { + List jList = new ArrayList(); + for (JournalAndStream j : journals) { + jList.add(j.getManager()); + } + return jList; + } + + void add(JournalManager j) { + journals.add(new JournalAndStream(j)); + } + + void remove(JournalManager j) { + JournalAndStream jasToRemove = null; + for (JournalAndStream jas: journals) { + if (jas.getManager().equals(j)) { + jasToRemove = jas; + break; + } + } + if (jasToRemove != null) { + jasToRemove.abort(); + journals.remove(jasToRemove); + } + } + + @Override + public void purgeLogsOlderThan(final long minTxIdToKeep) throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.getManager().purgeLogsOlderThan(minTxIdToKeep); + } + }, "purgeLogsOlderThan " + minTxIdToKeep); + } + + @Override + public void recoverUnfinalizedSegments() throws IOException { + mapJournalsAndReportErrors(new JournalClosure() { + @Override + public void apply(JournalAndStream jas) throws IOException { + jas.getManager().recoverUnfinalizedSegments(); + } + }, "recoverUnfinalizedSegments"); + } + + /** + * Return a manifest of what finalized edit logs are available. All available + * edit logs are returned starting from the transaction id passed. + * + * @param fromTxId Starting transaction id to read the logs. + * @return RemoteEditLogManifest object. + */ + public synchronized RemoteEditLogManifest getEditLogManifest(long fromTxId) { + // Collect RemoteEditLogs available from each FileJournalManager + List allLogs = Lists.newArrayList(); + for (JournalAndStream j : journals) { + if (j.getManager() instanceof FileJournalManager) { + FileJournalManager fjm = (FileJournalManager)j.getManager(); + try { + allLogs.addAll(fjm.getRemoteEditLogs(fromTxId)); + } catch (Throwable t) { + LOG.warn("Cannot list edit logs in " + fjm, t); + } + } + } + + // Group logs by their starting txid + ImmutableListMultimap logsByStartTxId = + Multimaps.index(allLogs, RemoteEditLog.GET_START_TXID); + long curStartTxId = fromTxId; + + List logs = Lists.newArrayList(); + while (true) { + ImmutableList logGroup = logsByStartTxId.get(curStartTxId); + if (logGroup.isEmpty()) { + // we have a gap in logs - for example because we recovered some old + // storage directory with ancient logs. Clear out any logs we've + // accumulated so far, and then skip to the next segment of logs + // after the gap. + SortedSet startTxIds = Sets.newTreeSet(logsByStartTxId.keySet()); + startTxIds = startTxIds.tailSet(curStartTxId); + if (startTxIds.isEmpty()) { + break; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Found gap in logs at " + curStartTxId + ": " + + "not returning previous logs in manifest."); + } + logs.clear(); + curStartTxId = startTxIds.first(); + continue; + } + } + + // Find the one that extends the farthest forward + RemoteEditLog bestLog = Collections.max(logGroup); + logs.add(bestLog); + // And then start looking from after that point + curStartTxId = bestLog.getEndTxId() + 1; + } + RemoteEditLogManifest ret = new RemoteEditLogManifest(logs); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated manifest for logs since " + fromTxId + ":" + + ret); + } + return ret; + } + + /** + * Add sync times to the buffer. + */ + String getSyncTimes() { + StringBuilder buf = new StringBuilder(); + for (JournalAndStream jas : journals) { + if (jas.isActive()) { + buf.append(jas.getCurrentStream().getTotalSyncTime()); + buf.append(" "); + } + } + return buf.toString(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 3e4eaf5f9d..5b0d7a79cf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -547,7 +547,7 @@ public void concat(String trg, String[] src) throws IOException { } @Override // ClientProtocol - public void rename(String src, String dst, Options.Rename... options) + public void rename2(String src, String dst, Options.Rename... options) throws IOException { nn.checkOperation(OperationCategory.WRITE); if(stateChangeLog.isDebugEnabled()) { @@ -561,13 +561,6 @@ public void rename(String src, String dst, Options.Rename... options) metrics.incrFilesRenamed(); } - @Deprecated - @Override // ClientProtocol - public boolean delete(String src) throws IOException { - nn.checkOperation(OperationCategory.WRITE); - return delete(src, true); - } - @Override // ClientProtocol public boolean delete(String src, boolean recursive) throws IOException { nn.checkOperation(OperationCategory.WRITE); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 2dd1db3341..8e0b6b091e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -46,6 +46,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Options; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; @@ -112,7 +113,9 @@ public static String getRemoteAddress() { private static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset ) throws IOException { - if (op == GetOpParam.Op.OPEN || op == PostOpParam.Op.APPEND) { + if (op == GetOpParam.Op.OPEN + || op == GetOpParam.Op.GETFILECHECKSUM + || op == PostOpParam.Op.APPEND) { final NamenodeProtocols np = namenode.getRpcServer(); final HdfsFileStatus status = np.getFileInfo(path); final long len = status.getLen(); @@ -245,7 +248,7 @@ public Response run() throws IOException, URISyntaxException { case MKDIRS: { final boolean b = np.mkdirs(fullpath, permission.getFsPermission(), true); - final String js = JsonUtil.toJsonString(PutOpParam.Op.MKDIRS, b); + final String js = JsonUtil.toJsonString("boolean", b); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } case RENAME: @@ -254,10 +257,10 @@ public Response run() throws IOException, URISyntaxException { if (s.isEmpty()) { @SuppressWarnings("deprecation") final boolean b = np.rename(fullpath, dstPath.getValue()); - final String js = JsonUtil.toJsonString(PutOpParam.Op.RENAME, b); + final String js = JsonUtil.toJsonString("boolean", b); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } else { - np.rename(fullpath, dstPath.getValue(), + np.rename2(fullpath, dstPath.getValue(), s.toArray(new Options.Rename[s.size()])); return Response.ok().type(MediaType.APPLICATION_JSON).build(); } @@ -265,7 +268,7 @@ public Response run() throws IOException, URISyntaxException { case SETREPLICATION: { final boolean b = np.setReplication(fullpath, replication.getValue()); - final String js = JsonUtil.toJsonString(PutOpParam.Op.SETREPLICATION, b); + final String js = JsonUtil.toJsonString("boolean", b); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } case SETOWNER: @@ -431,6 +434,18 @@ public Response run() throws IOException, URISyntaxException { final StreamingOutput streaming = getListingStream(np, fullpath); return Response.ok(streaming).type(MediaType.APPLICATION_JSON).build(); } + case GETCONTENTSUMMARY: + { + final ContentSummary contentsummary = np.getContentSummary(fullpath); + final String js = JsonUtil.toJsonString(contentsummary); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } + case GETFILECHECKSUM: + { + final URI uri = redirectURI(namenode, ugi, delegation, fullpath, + op.getValue(), -1L); + return Response.temporaryRedirect(uri).build(); + } case GETDELEGATIONTOKEN: { final Token token = generateDelegationToken( @@ -467,7 +482,7 @@ private static StreamingOutput getListingStream(final NamenodeProtocols np, @Override public void write(final OutputStream outstream) throws IOException { final PrintStream out = new PrintStream(outstream); - out.print('['); + out.println("{\"" + HdfsFileStatus[].class.getSimpleName() + "\":["); final HdfsFileStatus[] partial = first.getPartialListing(); if (partial.length > 0) { @@ -486,7 +501,7 @@ public void write(final OutputStream outstream) throws IOException { } } - out.println(']'); + out.println("]}"); } }; } @@ -522,7 +537,7 @@ public Response run() throws IOException { case DELETE: { final boolean b = namenode.getRpcServer().delete(fullpath, recursive.getValue()); - final String js = JsonUtil.toJsonString(DeleteOpParam.Op.DELETE, b); + final String js = JsonUtil.toJsonString("boolean", b); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } default: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 314d53b38f..adf639c32b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.web; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -24,6 +26,8 @@ import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -34,45 +38,33 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.StringUtils; import org.mortbay.util.ajax.JSON; /** JSON Utilities */ public class JsonUtil { - private static class ThreadLocalMap extends ThreadLocal> { - @Override - protected Map initialValue() { - return new TreeMap(); - } - - @Override - public Map get() { - final Map m = super.get(); - m.clear(); - return m; - } - } - - private static final ThreadLocalMap jsonMap = new ThreadLocalMap(); - private static final ThreadLocalMap tokenMap = new ThreadLocalMap(); - private static final ThreadLocalMap datanodeInfoMap = new ThreadLocalMap(); - private static final ThreadLocalMap extendedBlockMap = new ThreadLocalMap(); - private static final ThreadLocalMap locatedBlockMap = new ThreadLocalMap(); - + private static final Object[] EMPTY_OBJECT_ARRAY = {}; private static final DatanodeInfo[] EMPTY_DATANODE_INFO_ARRAY = {}; /** Convert a token object to a Json string. */ public static String toJsonString(final Token token ) throws IOException { + return toJsonString(Token.class, toJsonMap(token)); + } + + private static Map toJsonMap( + final Token token) throws IOException { if (token == null) { return null; } - final Map m = tokenMap.get(); + final Map m = new TreeMap(); m.put("urlString", token.encodeToUrlString()); - return JSON.toString(m); + return m; } /** Convert a Json map to a Token. */ @@ -91,46 +83,52 @@ public static Token toToken( /** Convert a Json map to a Token of DelegationTokenIdentifier. */ @SuppressWarnings("unchecked") public static Token toDelegationToken( - final Map m) throws IOException { + final Map json) throws IOException { + final Map m = (Map)json.get(Token.class.getSimpleName()); return (Token)toToken(m); } /** Convert a Json map to a Token of BlockTokenIdentifier. */ @SuppressWarnings("unchecked") - public static Token toBlockToken( + private static Token toBlockToken( final Map m) throws IOException { return (Token)toToken(m); } /** Convert an exception object to a Json string. */ public static String toJsonString(final Exception e) { - final Map m = jsonMap.get(); + final Map m = new TreeMap(); m.put("className", e.getClass().getName()); m.put("message", e.getMessage()); - return JSON.toString(m); + return toJsonString(RemoteException.class, m); } /** Convert a Json map to a RemoteException. */ - public static RemoteException toRemoteException(final Map m) { + public static RemoteException toRemoteException(final Map json) { + final Map m = (Map)json.get(RemoteException.class.getSimpleName()); final String className = (String)m.get("className"); final String message = (String)m.get("message"); return new RemoteException(className, message); } + private static String toJsonString(final Class clazz, final Object value) { + return toJsonString(clazz.getSimpleName(), value); + } + /** Convert a key-value pair to a Json string. */ - public static String toJsonString(final Object key, final Object value) { - final Map m = jsonMap.get(); - m.put(key instanceof String ? (String) key : key.toString(), value); + public static String toJsonString(final String key, final Object value) { + final Map m = new TreeMap(); + m.put(key, value); return JSON.toString(m); } /** Convert a FsPermission object to a string. */ - public static String toString(final FsPermission permission) { + private static String toString(final FsPermission permission) { return String.format("%o", permission.toShort()); } /** Convert a string to a FsPermission object. */ - public static FsPermission toFsPermission(final String s) { + private static FsPermission toFsPermission(final String s) { return new FsPermission(Short.parseShort(s, 8)); } @@ -139,7 +137,7 @@ public static String toJsonString(final HdfsFileStatus status) { if (status == null) { return null; } else { - final Map m = jsonMap.get(); + final Map m = new TreeMap(); m.put("localName", status.getLocalName()); m.put("isDir", status.isDir()); m.put("isSymlink", status.isSymlink()); @@ -155,21 +153,17 @@ public static String toJsonString(final HdfsFileStatus status) { m.put("modificationTime", status.getModificationTime()); m.put("blockSize", status.getBlockSize()); m.put("replication", status.getReplication()); - return JSON.toString(m); + return toJsonString(HdfsFileStatus.class, m); } } - @SuppressWarnings("unchecked") - static Map parse(String jsonString) { - return (Map) JSON.parse(jsonString); - } - /** Convert a Json map to a HdfsFileStatus object. */ - public static HdfsFileStatus toFileStatus(final Map m) { - if (m == null) { + public static HdfsFileStatus toFileStatus(final Map json) { + if (json == null) { return null; } + final Map m = (Map)json.get(HdfsFileStatus.class.getSimpleName()); final String localName = (String) m.get("localName"); final boolean isDir = (Boolean) m.get("isDir"); final boolean isSymlink = (Boolean) m.get("isSymlink"); @@ -189,22 +183,22 @@ public static HdfsFileStatus toFileStatus(final Map m) { symlink, DFSUtil.string2Bytes(localName)); } - /** Convert a LocatedBlock to a Json string. */ - public static String toJsonString(final ExtendedBlock extendedblock) { + /** Convert an ExtendedBlock to a Json map. */ + private static Map toJsonMap(final ExtendedBlock extendedblock) { if (extendedblock == null) { return null; } - final Map m = extendedBlockMap.get(); + final Map m = new TreeMap(); m.put("blockPoolId", extendedblock.getBlockPoolId()); m.put("blockId", extendedblock.getBlockId()); m.put("numBytes", extendedblock.getNumBytes()); m.put("generationStamp", extendedblock.getGenerationStamp()); - return JSON.toString(m); + return m; } /** Convert a Json map to an ExtendedBlock object. */ - public static ExtendedBlock toExtendedBlock(final Map m) { + private static ExtendedBlock toExtendedBlock(final Map m) { if (m == null) { return null; } @@ -216,13 +210,13 @@ public static ExtendedBlock toExtendedBlock(final Map m) { return new ExtendedBlock(blockPoolId, blockId, numBytes, generationStamp); } - /** Convert a DatanodeInfo to a Json string. */ - public static String toJsonString(final DatanodeInfo datanodeinfo) { + /** Convert a DatanodeInfo to a Json map. */ + private static Map toJsonMap(final DatanodeInfo datanodeinfo) { if (datanodeinfo == null) { return null; } - final Map m = datanodeInfoMap.get(); + final Map m = new TreeMap(); m.put("name", datanodeinfo.getName()); m.put("storageID", datanodeinfo.getStorageID()); m.put("infoPort", datanodeinfo.getInfoPort()); @@ -238,11 +232,11 @@ public static String toJsonString(final DatanodeInfo datanodeinfo) { m.put("networkLocation", datanodeinfo.getNetworkLocation()); m.put("hostName", datanodeinfo.getHostName()); m.put("adminState", datanodeinfo.getAdminState().name()); - return JSON.toString(m); + return m; } /** Convert a Json map to an DatanodeInfo object. */ - public static DatanodeInfo toDatanodeInfo(final Map m) { + private static DatanodeInfo toDatanodeInfo(final Map m) { if (m == null) { return null; } @@ -264,25 +258,23 @@ public static DatanodeInfo toDatanodeInfo(final Map m) { AdminStates.valueOf((String)m.get("adminState"))); } - /** Convert a DatanodeInfo[] to a Json string. */ - public static String toJsonString(final DatanodeInfo[] array - ) throws IOException { + /** Convert a DatanodeInfo[] to a Json array. */ + private static Object[] toJsonArray(final DatanodeInfo[] array) { if (array == null) { return null; } else if (array.length == 0) { - return "[]"; + return EMPTY_OBJECT_ARRAY; } else { - final StringBuilder b = new StringBuilder().append('[').append( - toJsonString(array[0])); - for(int i = 1; i < array.length; i++) { - b.append(", ").append(toJsonString(array[i])); + final Object[] a = new Object[array.length]; + for(int i = 0; i < array.length; i++) { + a[i] = toJsonMap(array[i]); } - return b.append(']').toString(); + return a; } } /** Convert an Object[] to a DatanodeInfo[]. */ - public static DatanodeInfo[] toDatanodeInfoArray(final Object[] objects) { + private static DatanodeInfo[] toDatanodeInfoArray(final Object[] objects) { if (objects == null) { return null; } else if (objects.length == 0) { @@ -290,66 +282,63 @@ public static DatanodeInfo[] toDatanodeInfoArray(final Object[] objects) { } else { final DatanodeInfo[] array = new DatanodeInfo[objects.length]; for(int i = 0; i < array.length; i++) { - array[i] = (DatanodeInfo)toDatanodeInfo((Map) objects[i]); + array[i] = toDatanodeInfo((Map) objects[i]); } return array; } } - /** Convert a LocatedBlock to a Json string. */ - public static String toJsonString(final LocatedBlock locatedblock + /** Convert a LocatedBlock to a Json map. */ + private static Map toJsonMap(final LocatedBlock locatedblock ) throws IOException { if (locatedblock == null) { return null; } - final Map m = locatedBlockMap.get(); - m.put("blockToken", toJsonString(locatedblock.getBlockToken())); + final Map m = new TreeMap(); + m.put("blockToken", toJsonMap(locatedblock.getBlockToken())); m.put("isCorrupt", locatedblock.isCorrupt()); m.put("startOffset", locatedblock.getStartOffset()); - m.put("block", toJsonString(locatedblock.getBlock())); - - m.put("locations", toJsonString(locatedblock.getLocations())); - return JSON.toString(m); + m.put("block", toJsonMap(locatedblock.getBlock())); + m.put("locations", toJsonArray(locatedblock.getLocations())); + return m; } /** Convert a Json map to LocatedBlock. */ - public static LocatedBlock toLocatedBlock(final Map m) throws IOException { + private static LocatedBlock toLocatedBlock(final Map m) throws IOException { if (m == null) { return null; } - final ExtendedBlock b = toExtendedBlock((Map)JSON.parse((String)m.get("block"))); + final ExtendedBlock b = toExtendedBlock((Map)m.get("block")); final DatanodeInfo[] locations = toDatanodeInfoArray( - (Object[])JSON.parse((String)m.get("locations"))); + (Object[])m.get("locations")); final long startOffset = (Long)m.get("startOffset"); final boolean isCorrupt = (Boolean)m.get("isCorrupt"); final LocatedBlock locatedblock = new LocatedBlock(b, locations, startOffset, isCorrupt); - locatedblock.setBlockToken(toBlockToken((Map)JSON.parse((String)m.get("blockToken")))); + locatedblock.setBlockToken(toBlockToken((Map)m.get("blockToken"))); return locatedblock; } - /** Convert a LocatedBlock[] to a Json string. */ - public static String toJsonString(final List array + /** Convert a LocatedBlock[] to a Json array. */ + private static Object[] toJsonArray(final List array ) throws IOException { if (array == null) { return null; } else if (array.size() == 0) { - return "[]"; + return EMPTY_OBJECT_ARRAY; } else { - final StringBuilder b = new StringBuilder().append('[').append( - toJsonString(array.get(0))); - for(int i = 1; i < array.size(); i++) { - b.append(",\n ").append(toJsonString(array.get(i))); + final Object[] a = new Object[array.size()]; + for(int i = 0; i < array.size(); i++) { + a[i] = toJsonMap(array.get(0)); } - return b.append(']').toString(); + return a; } } - /** Convert an Object[] to a List of LocatedBlock. - * @throws IOException */ - public static List toLocatedBlockList(final Object[] objects + /** Convert an Object[] to a List of LocatedBlock. */ + private static List toLocatedBlockList(final Object[] objects ) throws IOException { if (objects == null) { return null; @@ -358,7 +347,7 @@ public static List toLocatedBlockList(final Object[] objects } else { final List list = new ArrayList(objects.length); for(int i = 0; i < objects.length; i++) { - list.add((LocatedBlock)toLocatedBlock((Map)objects[i])); + list.add(toLocatedBlock((Map)objects[i])); } return list; } @@ -371,31 +360,115 @@ public static String toJsonString(final LocatedBlocks locatedblocks return null; } - final Map m = jsonMap.get(); + final Map m = new TreeMap(); m.put("fileLength", locatedblocks.getFileLength()); m.put("isUnderConstruction", locatedblocks.isUnderConstruction()); - m.put("locatedBlocks", toJsonString(locatedblocks.getLocatedBlocks())); - m.put("lastLocatedBlock", toJsonString(locatedblocks.getLastLocatedBlock())); + m.put("locatedBlocks", toJsonArray(locatedblocks.getLocatedBlocks())); + m.put("lastLocatedBlock", toJsonMap(locatedblocks.getLastLocatedBlock())); m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete()); - return JSON.toString(m); + return toJsonString(LocatedBlocks.class, m); } /** Convert a Json map to LocatedBlock. */ - public static LocatedBlocks toLocatedBlocks(final Map m + public static LocatedBlocks toLocatedBlocks(final Map json ) throws IOException { - if (m == null) { + if (json == null) { return null; } - + + final Map m = (Map)json.get(LocatedBlocks.class.getSimpleName()); final long fileLength = (Long)m.get("fileLength"); final boolean isUnderConstruction = (Boolean)m.get("isUnderConstruction"); final List locatedBlocks = toLocatedBlockList( - (Object[])JSON.parse((String) m.get("locatedBlocks"))); + (Object[])m.get("locatedBlocks")); final LocatedBlock lastLocatedBlock = toLocatedBlock( - (Map)JSON.parse((String)m.get("lastLocatedBlock"))); + (Map)m.get("lastLocatedBlock")); final boolean isLastBlockComplete = (Boolean)m.get("isLastBlockComplete"); return new LocatedBlocks(fileLength, isUnderConstruction, locatedBlocks, lastLocatedBlock, isLastBlockComplete); } + + /** Convert a ContentSummary to a Json string. */ + public static String toJsonString(final ContentSummary contentsummary) { + if (contentsummary == null) { + return null; + } + + final Map m = new TreeMap(); + m.put("length", contentsummary.getLength()); + m.put("fileCount", contentsummary.getFileCount()); + m.put("directoryCount", contentsummary.getDirectoryCount()); + m.put("quota", contentsummary.getQuota()); + m.put("spaceConsumed", contentsummary.getSpaceConsumed()); + m.put("spaceQuota", contentsummary.getSpaceQuota()); + return toJsonString(ContentSummary.class, m); + } + + /** Convert a Json map to a ContentSummary. */ + public static ContentSummary toContentSummary(final Map json) { + if (json == null) { + return null; + } + + final Map m = (Map)json.get(ContentSummary.class.getSimpleName()); + final long length = (Long)m.get("length"); + final long fileCount = (Long)m.get("fileCount"); + final long directoryCount = (Long)m.get("directoryCount"); + final long quota = (Long)m.get("quota"); + final long spaceConsumed = (Long)m.get("spaceConsumed"); + final long spaceQuota = (Long)m.get("spaceQuota"); + + return new ContentSummary(length, fileCount, directoryCount, + quota, spaceConsumed, spaceQuota); + } + + /** Convert a MD5MD5CRC32FileChecksum to a Json string. */ + public static String toJsonString(final MD5MD5CRC32FileChecksum checksum) { + if (checksum == null) { + return null; + } + + final Map m = new TreeMap(); + m.put("algorithm", checksum.getAlgorithmName()); + m.put("length", checksum.getLength()); + m.put("bytes", StringUtils.byteToHexString(checksum.getBytes())); + return toJsonString(MD5MD5CRC32FileChecksum.class, m); + } + + /** Convert a Json map to a MD5MD5CRC32FileChecksum. */ + public static MD5MD5CRC32FileChecksum toMD5MD5CRC32FileChecksum( + final Map json) throws IOException { + if (json == null) { + return null; + } + + final Map m = (Map)json.get( + MD5MD5CRC32FileChecksum.class.getSimpleName()); + final String algorithm = (String)m.get("algorithm"); + final int length = (int)(long)(Long)m.get("length"); + final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes")); + + final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes)); + final int bytesPerCRC = in.readInt(); + final long crcPerBlock = in.readLong(); + final MD5Hash md5 = MD5Hash.read(in); + final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum( + bytesPerCRC, crcPerBlock, md5); + + //check algorithm name + final String alg = "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32"; + if (!alg.equals(algorithm)) { + throw new IOException("Algorithm not matched: algorithm=" + algorithm + + ", crcPerBlock=" + crcPerBlock + + ", bytesPerCRC=" + bytesPerCRC); + } + //check length + if (length != checksum.getLength()) { + throw new IOException("Length not matched: length=" + length + + ", checksum.getLength()=" + checksum.getLength()); + } + + return checksum; + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 35c325281b..27d6fe166e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -33,10 +33,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; @@ -278,7 +280,7 @@ public boolean mkdirs(Path f, FsPermission permission) throws IOException { final HttpOpParam.Op op = PutOpParam.Op.MKDIRS; final Map json = run(op, f, new PermissionParam(applyUMask(permission))); - return (Boolean)json.get(op.toString()); + return (Boolean)json.get("boolean"); } @Override @@ -287,7 +289,7 @@ public boolean rename(final Path src, final Path dst) throws IOException { final HttpOpParam.Op op = PutOpParam.Op.RENAME; final Map json = run(op, src, new DstPathParam(makeQualified(dst).toUri().getPath())); - return (Boolean)json.get(op.toString()); + return (Boolean)json.get("boolean"); } @SuppressWarnings("deprecation") @@ -327,7 +329,7 @@ public boolean setReplication(final Path p, final short replication final HttpOpParam.Op op = PutOpParam.Op.SETREPLICATION; final Map json = run(op, p, new ReplicationParam(replication)); - return (Boolean)json.get(op.toString()); + return (Boolean)json.get("boolean"); } @Override @@ -384,7 +386,7 @@ public FSDataOutputStream append(final Path f, final int bufferSize, public boolean delete(Path f, boolean recursive) throws IOException { final HttpOpParam.Op op = DeleteOpParam.Op.DELETE; final Map json = run(op, f, new RecursiveParam(recursive)); - return (Boolean)json.get(op.toString()); + return (Boolean)json.get("boolean"); } @Override @@ -401,7 +403,9 @@ public FileStatus[] listStatus(final Path f) throws IOException { statistics.incrementReadOps(1); final HttpOpParam.Op op = GetOpParam.Op.LISTSTATUS; - final Object[] array = run(op, f); + final Map json = run(op, f); + final Object[] array = (Object[])json.get( + HdfsFileStatus[].class.getSimpleName()); //convert FileStatus final FileStatus[] statuses = new FileStatus[array.length]; @@ -449,4 +453,23 @@ public BlockLocation[] getFileBlockLocations(final Path p, new LengthParam(length)); return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m)); } + + @Override + public ContentSummary getContentSummary(final Path p) throws IOException { + statistics.incrementReadOps(1); + + final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY; + final Map m = run(op, p); + return JsonUtil.toContentSummary(m); + } + + @Override + public MD5MD5CRC32FileChecksum getFileChecksum(final Path p + ) throws IOException { + statistics.incrementReadOps(1); + + final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM; + final Map m = run(op, p); + return JsonUtil.toMD5MD5CRC32FileChecksum(m); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index d547f1b1b4..cab71c99d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -28,6 +28,8 @@ public static enum Op implements HttpOpParam.Op { GETFILESTATUS(HttpURLConnection.HTTP_OK), LISTSTATUS(HttpURLConnection.HTTP_OK), + GETCONTENTSUMMARY(HttpURLConnection.HTTP_OK), + GETFILECHECKSUM(HttpURLConnection.HTTP_OK), GETDELEGATIONTOKEN(HttpURLConnection.HTTP_OK), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/hadoop.control/preinst b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/hadoop.control/preinst index 940b7add67..13820227f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/hadoop.control/preinst +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/hadoop.control/preinst @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -/usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/hdfs hdfs 2> /dev/null || : +/usr/sbin/useradd --comment "Hadoop HDFS" -u 201 --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/hdfs hdfs 2> /dev/null || : diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/init.d/hadoop-secondarynamenode b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/init.d/hadoop-secondarynamenode new file mode 100644 index 0000000000..1b08cd38b8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/deb/init.d/hadoop-secondarynamenode @@ -0,0 +1,142 @@ +#! /bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +### BEGIN INIT INFO +# Provides: hadoop-secondarynamenode +# Required-Start: $remote_fs $syslog +# Required-Stop: $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: +# Short-Description: Apache Hadoop Name Node server +### END INIT INFO + +set -e + +# /etc/init.d/hadoop-secondarynamenode: start and stop the Apache Hadoop Secondary Name Node daemon + +test -x /usr/bin/hadoop || exit 0 +( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0 + +umask 022 + +if test -f /etc/default/hadoop-env.sh; then + . /etc/default/hadoop-env.sh +fi + +. /lib/lsb/init-functions + +# Are we running from init? +run_by_init() { + ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ] +} + +check_for_no_start() { + # forget it if we're trying to start, and /etc/hadoop/hadoop-secondarynamenode_not_to_be_run exists + if [ -e /etc/hadoop/hadoop-secondarynamenode_not_to_be_run ]; then + if [ "$1" = log_end_msg ]; then + log_end_msg 0 + fi + if ! run_by_init; then + log_action_msg "Apache Hadoop Name Node server not in use (/etc/hadoop/hadoop-secondarynamenode_not_to_be_run)" + fi + exit 0 + fi +} + +check_privsep_dir() { + # Create the PrivSep empty dir if necessary + if [ ! -d ${HADOOP_PID_DIR} ]; then + mkdir -p ${HADOOP_PID_DIR} + chown root:hadoop ${HADOOP_PID_DIR} + chmod 0775 ${HADOOP_PID_DIR} + fi +} + +export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin" +export HADOOP_PREFIX="/usr" + +case "$1" in + start) + check_privsep_dir + check_for_no_start + log_daemon_msg "Starting Apache Hadoop Secondary Name Node server" "hadoop-secondarynamenode" + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start secondarynamenode; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + stop) + log_daemon_msg "Stopping Apache Hadoop Secondary Name Node server" "hadoop-secondarynamenode" + if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + restart) + check_privsep_dir + log_daemon_msg "Restarting Apache Hadoop Secondary Name Node server" "hadoop-secondarynamenode" + start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid + check_for_no_start log_end_msg + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start secondarynamenode; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + + try-restart) + check_privsep_dir + log_daemon_msg "Restarting Apache Hadoop Secondary Name Node server" "hadoop-secondarynamenode" + set +e + start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid + RET="$?" + set -e + case $RET in + 0) + # old daemon stopped + check_for_no_start log_end_msg + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start secondarynamenode; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + 1) + # daemon not running + log_progress_msg "(not running)" + log_end_msg 0 + ;; + *) + # failed to stop + log_progress_msg "(failed to stop)" + log_end_msg 1 + ;; + esac + ;; + + status) + status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid ${JAVA_HOME}/bin/java hadoop-secondarynamenode && exit 0 || exit $? + ;; + + *) + log_action_msg "Usage: /etc/init.d/hadoop-secondarynamenode {start|stop|restart|try-restart|status}" + exit 1 +esac + +exit 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/init.d/hadoop-secondarynamenode b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/init.d/hadoop-secondarynamenode new file mode 100644 index 0000000000..81fb7445cd --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/init.d/hadoop-secondarynamenode @@ -0,0 +1,92 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Starts a Hadoop secondarynamenode +# +# chkconfig: 2345 90 10 +# description: Hadoop secondarynamenode + +source /etc/rc.d/init.d/functions +source /etc/default/hadoop-env.sh + +RETVAL=0 +PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-secondarynamenode.pid" +desc="Hadoop secondary namenode daemon" +export HADOOP_PREFIX="/usr" + +start() { + echo -n $"Starting $desc (hadoop-secondarynamenode): " + daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start secondarynamenode $1 + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-secondarynamenode + return $RETVAL +} + +upgrade() { + start -upgrade +} + +stop() { + echo -n $"Stopping $desc (hadoop-secondarynamenode): " + daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop secondarynamenode + RETVAL=$? + sleep 5 + echo + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-secondarynamenode $PIDFILE +} + +checkstatus(){ + status -p $PIDFILE ${JAVA_HOME}/bin/java + RETVAL=$? +} + +restart() { + stop + start +} + +condrestart(){ + [ -e /var/lock/subsys/hadoop-secondarynamenode ] && restart || : +} + +case "$1" in + start) + start + ;; + upgrade) + upgrade + ;; + stop) + stop + ;; + status) + checkstatus + ;; + restart) + restart + ;; + condrestart|try-restart) + condrestart + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|try-restart|upgrade}" + exit 1 +esac + +exit $RETVAL diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/spec/hadoop-hdfs.spec b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/spec/hadoop-hdfs.spec index d2dcf4f2d8..edc4a3b8ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/spec/hadoop-hdfs.spec +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/packages/rpm/spec/hadoop-hdfs.spec @@ -144,8 +144,8 @@ mv ${RPM_BUILD_DIR}/%{_final_name}/share/* ${RPM_BUILD_DIR}%{_share_dir} rm -rf ${RPM_BUILD_DIR}/%{_final_name}/etc %pre -getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop -/usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r --groups hadoop --home %{_var_dir}/hdfs hdfs 2> /dev/null || : +getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -g 123 -r hadoop +/usr/sbin/useradd --comment "Hadoop HDFS" -u 201 --shell /bin/bash -M -r --groups hadoop --home %{_var_dir}/hdfs hdfs 2> /dev/null || : %post bash ${RPM_INSTALL_PREFIX0}/sbin/update-hdfs-env.sh \ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 5184ceb782..0ea9f01786 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -26,6 +26,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; +import java.security.PrivilegedExceptionAction; import java.util.Random; import org.apache.commons.logging.impl.Log4JLogger; @@ -37,6 +38,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Level; import org.junit.Test; @@ -399,15 +401,40 @@ public void testFileChecksum() throws Exception { RAN.setSeed(seed); final Configuration conf = getTestConfiguration(); + conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); conf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, "localhost"); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final FileSystem hdfs = cluster.getFileSystem(); - final String hftpuri = "hftp://" + conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); - System.out.println("hftpuri=" + hftpuri); - final FileSystem hftp = new Path(hftpuri).getFileSystem(conf); - final String dir = "/filechecksum"; + final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); + final UserGroupInformation current = UserGroupInformation.getCurrentUser(); + final UserGroupInformation ugi = UserGroupInformation.createUserForTesting( + current.getShortUserName() + "x", new String[]{"user"}); + + //hftp + final String hftpuri = "hftp://" + nnAddr; + System.out.println("hftpuri=" + hftpuri); + final FileSystem hftp = ugi.doAs( + new PrivilegedExceptionAction() { + @Override + public FileSystem run() throws Exception { + return new Path(hftpuri).getFileSystem(conf); + } + }); + + //webhdfs + final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr; + System.out.println("webhdfsuri=" + webhdfsuri); + final FileSystem webhdfs = ugi.doAs( + new PrivilegedExceptionAction() { + @Override + public FileSystem run() throws Exception { + return new Path(webhdfsuri).getFileSystem(conf); + } + }); + + final Path dir = new Path("/filechecksum"); final int block_size = 1024; final int buffer_size = conf.getInt("io.file.buffer.size", 4096); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 512); @@ -431,7 +458,8 @@ public void testFileChecksum() throws Exception { //compute checksum final FileChecksum hdfsfoocs = hdfs.getFileChecksum(foo); System.out.println("hdfsfoocs=" + hdfsfoocs); - + + //hftp final FileChecksum hftpfoocs = hftp.getFileChecksum(foo); System.out.println("hftpfoocs=" + hftpfoocs); @@ -439,6 +467,14 @@ public void testFileChecksum() throws Exception { final FileChecksum qfoocs = hftp.getFileChecksum(qualified); System.out.println("qfoocs=" + qfoocs); + //webhdfs + final FileChecksum webhdfsfoocs = webhdfs.getFileChecksum(foo); + System.out.println("webhdfsfoocs=" + webhdfsfoocs); + + final Path webhdfsqualified = new Path(webhdfsuri + dir, "foo" + n); + final FileChecksum webhdfs_qfoocs = webhdfs.getFileChecksum(webhdfsqualified); + System.out.println("webhdfs_qfoocs=" + webhdfs_qfoocs); + //write another file final Path bar = new Path(dir, "bar" + n); { @@ -454,24 +490,40 @@ public void testFileChecksum() throws Exception { assertEquals(hdfsfoocs.hashCode(), barhashcode); assertEquals(hdfsfoocs, barcs); + //hftp assertEquals(hftpfoocs.hashCode(), barhashcode); assertEquals(hftpfoocs, barcs); assertEquals(qfoocs.hashCode(), barhashcode); assertEquals(qfoocs, barcs); + + //webhdfs + assertEquals(webhdfsfoocs.hashCode(), barhashcode); + assertEquals(webhdfsfoocs, barcs); + + assertEquals(webhdfs_qfoocs.hashCode(), barhashcode); + assertEquals(webhdfs_qfoocs, barcs); } + hdfs.setPermission(dir, new FsPermission((short)0)); { //test permission error on hftp - hdfs.setPermission(new Path(dir), new FsPermission((short)0)); try { - final String username = UserGroupInformation.getCurrentUser().getShortUserName() + "1"; - final HftpFileSystem hftp2 = cluster.getHftpFileSystemAs(username, conf, 0, "somegroup"); - hftp2.getFileChecksum(qualified); + hftp.getFileChecksum(qualified); fail(); } catch(IOException ioe) { FileSystem.LOG.info("GOOD: getting an exception", ioe); } } + + { //test permission error on webhdfs + try { + webhdfs.getFileChecksum(webhdfsqualified); + fail(); + } catch(IOException ioe) { + FileSystem.LOG.info("GOOD: getting an exception", ioe); + } + } + hdfs.setPermission(dir, new FsPermission((short)0777)); } cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java index d7ee516b0a..9d5afc29c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; import org.apache.hadoop.hdfs.tools.DFSAdmin; +import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; import org.junit.Test; @@ -770,6 +771,11 @@ public void testSpaceCommands() throws Exception { } } + private static void checkContentSummary(final ContentSummary expected, + final ContentSummary computed) { + assertEquals(expected.toString(), computed.toString()); + } + /** * Violate a space quota using files of size < 1 block. Test that block * allocation conservatively assumes that for quota checking the entire @@ -781,12 +787,18 @@ public void testBlockAllocationAdjustsUsageConservatively() Configuration conf = new HdfsConfiguration(); final int BLOCK_SIZE = 6 * 1024; conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); DFSAdmin admin = new DFSAdmin(conf); - + + final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); + final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr; + System.out.println("webhdfsuri=" + webhdfsuri); + final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf); + try { Path dir = new Path("/test"); Path file1 = new Path("/test/test1"); @@ -806,6 +818,7 @@ public void testBlockAllocationAdjustsUsageConservatively() DFSTestUtil.createFile(fs, file1, FILE_SIZE, (short) 3, 1L); DFSTestUtil.waitReplication(fs, file1, (short) 3); c = fs.getContentSummary(dir); + checkContentSummary(c, webhdfs.getContentSummary(dir)); assertEquals("Quota is half consumed", QUOTA_SIZE / 2, c.getSpaceConsumed()); @@ -836,12 +849,18 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception { Configuration conf = new HdfsConfiguration(); final int BLOCK_SIZE = 6 * 1024; conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); DFSAdmin admin = new DFSAdmin(conf); + final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); + final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr; + System.out.println("webhdfsuri=" + webhdfsuri); + final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf); + try { //Test for deafult NameSpace Quota @@ -882,6 +901,7 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception { // Should account for all 59 files (almost QUOTA_SIZE) c = fs.getContentSummary(dir); + checkContentSummary(c, webhdfs.getContentSummary(dir)); assertEquals("Invalid space consumed", 59 * FILE_SIZE * 3, c.getSpaceConsumed()); assertEquals("Invalid space consumed", QUOTA_SIZE - (59 * FILE_SIZE * 3), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java index 81133242a0..8711898692 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; @@ -33,6 +34,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -73,7 +75,7 @@ public void shutDownMiniCluster() throws IOException { public void testSingleFailedEditsDirOnFlush() throws IOException { assertTrue(doAnEdit()); // Invalidate one edits journal. - invalidateEditsDirAtIndex(0, true); + invalidateEditsDirAtIndex(0, true, false); // Make sure runtime.exit(...) hasn't been called at all yet. assertExitInvocations(0); assertTrue(doAnEdit()); @@ -86,8 +88,22 @@ public void testSingleFailedEditsDirOnFlush() throws IOException { public void testAllEditsDirsFailOnFlush() throws IOException { assertTrue(doAnEdit()); // Invalidate both edits journals. - invalidateEditsDirAtIndex(0, true); - invalidateEditsDirAtIndex(1, true); + invalidateEditsDirAtIndex(0, true, false); + invalidateEditsDirAtIndex(1, true, false); + // Make sure runtime.exit(...) hasn't been called at all yet. + assertExitInvocations(0); + assertTrue(doAnEdit()); + // The previous edit could not be synced to any persistent storage, should + // have halted the NN. + assertExitInvocations(1); + } + + @Test + public void testAllEditsDirFailOnWrite() throws IOException { + assertTrue(doAnEdit()); + // Invalidate both edits journals. + invalidateEditsDirAtIndex(0, true, true); + invalidateEditsDirAtIndex(1, true, true); // Make sure runtime.exit(...) hasn't been called at all yet. assertExitInvocations(0); assertTrue(doAnEdit()); @@ -100,7 +116,7 @@ public void testAllEditsDirsFailOnFlush() throws IOException { public void testSingleFailedEditsDirOnSetReadyToFlush() throws IOException { assertTrue(doAnEdit()); // Invalidate one edits journal. - invalidateEditsDirAtIndex(0, false); + invalidateEditsDirAtIndex(0, false, false); // Make sure runtime.exit(...) hasn't been called at all yet. assertExitInvocations(0); assertTrue(doAnEdit()); @@ -117,16 +133,18 @@ public void testSingleFailedEditsDirOnSetReadyToFlush() throws IOException { * @return the original EditLogOutputStream of the journal. */ private EditLogOutputStream invalidateEditsDirAtIndex(int index, - boolean failOnFlush) throws IOException { + boolean failOnFlush, boolean failOnWrite) throws IOException { FSImage fsimage = cluster.getNamesystem().getFSImage(); FSEditLog editLog = fsimage.getEditLog(); - - FSEditLog.JournalAndStream jas = editLog.getJournals().get(index); + JournalAndStream jas = editLog.getJournals().get(index); EditLogFileOutputStream elos = (EditLogFileOutputStream) jas.getCurrentStream(); EditLogFileOutputStream spyElos = spy(elos); - + if (failOnWrite) { + doThrow(new IOException("fail on write()")).when(spyElos).write( + (FSEditLogOp) any()); + } if (failOnFlush) { doThrow(new IOException("fail on flush()")).when(spyElos).flush(); } else { @@ -151,7 +169,7 @@ private void restoreEditsDirAtIndex(int index, EditLogOutputStream elos) { FSImage fsimage = cluster.getNamesystem().getFSImage(); FSEditLog editLog = fsimage.getEditLog(); - FSEditLog.JournalAndStream jas = editLog.getJournals().get(index); + JournalAndStream jas = editLog.getJournals().get(index); jas.setCurrentStreamForTests(elos); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java index bc5aa162fb..d3d64594ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; +import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.log4j.Level; @@ -356,7 +357,7 @@ public void testSaveImageWhileSyncInProgress() throws Exception { FSImage fsimage = namesystem.getFSImage(); FSEditLog editLog = fsimage.getEditLog(); - FSEditLog.JournalAndStream jas = editLog.getJournals().get(0); + JournalAndStream jas = editLog.getJournals().get(0); EditLogFileOutputStream spyElos = spy((EditLogFileOutputStream)jas.getCurrentStream()); jas.setCurrentStreamForTests(spyElos); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java index 11152883c8..f3a4638f10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java @@ -28,7 +28,6 @@ import java.util.Iterator; import java.util.Set; -import static org.mockito.Matchers.anyByte; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; @@ -45,7 +44,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; -import org.apache.hadoop.io.Writable; +import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getInProgressEditsFileName; import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getFinalizedEditsFileName; @@ -123,7 +122,7 @@ public void invalidateStorage(FSImage fi, Set filesToInvalidate) throws IO // simulate an error fi.getStorage().reportErrorsOnDirectories(al); - for (FSEditLog.JournalAndStream j : fi.getEditLog().getJournals()) { + for (JournalAndStream j : fi.getEditLog().getJournals()) { if (j.getManager() instanceof FileJournalManager) { FileJournalManager fm = (FileJournalManager)j.getManager(); if (fm.getStorageDirectory().getRoot().equals(path2) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java index a2000144f2..a4b687d5e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java @@ -17,14 +17,16 @@ */ package org.apache.hadoop.hdfs.web; +import java.util.Map; + import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; -import org.apache.hadoop.hdfs.web.JsonUtil; import org.junit.Assert; import org.junit.Test; +import org.mortbay.util.ajax.JSON; public class TestJsonUtil { static FileStatus toFileStatus(HdfsFileStatus f, String parent) { @@ -46,7 +48,7 @@ public void testHdfsFileStatus() { System.out.println("fstatus = " + fstatus); final String json = JsonUtil.toJsonString(status); System.out.println("json = " + json.replace(",", ",\n ")); - final HdfsFileStatus s2 = JsonUtil.toFileStatus(JsonUtil.parse(json)); + final HdfsFileStatus s2 = JsonUtil.toFileStatus((Map)JSON.parse(json)); final FileStatus fs2 = toFileStatus(s2, parent); System.out.println("s2 = " + s2); System.out.println("fs2 = " + fs2); diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 43fed6baa1..bb7242f29c 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -22,6 +22,9 @@ Trunk (unreleased changes) BUG FIXES + MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk. + (Ravi Gummadi via amarrk) + MAPREDUCE-2784. [Gridmix] Bug fixes in ExecutionSummarizer and ResourceUsageMatcher. (amarrk) @@ -75,6 +78,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-2930. Added the ability to be able to generate graphs from the state-machine definitions. (Binglin Chang via vinodkv) + MAPREDUCE-2719. Add a simple, DistributedShell, application to illustrate + alternate frameworks on YARN. (Hitesh Shah via acmurthy) + IMPROVEMENTS MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via @@ -321,6 +327,33 @@ Release 0.23.0 - Unreleased MAPREDUCE-3099. Add docs for setting up a single node MRv2 cluster. (mahadev) + MAPREDUCE-3001. Added task-specific counters to AppMaster and JobHistory + web-UIs. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3098. Fixed RM and MR AM to report YarnApplicationState and + application's FinalStatus separately. (Hitesh Shah via vinodkv) + + MAPREDUCE-2889. Added documentation for writing new YARN applications. + (Hitesh Shah via acmurthy) + + MAPREDUCE-3134. Added documentation the CapacityScheduler. (acmurthy) + + MAPREDUCE-3138. Add a utility to help applications bridge changes in + Context Objects APIs due to MAPREDUCE-954. (omalley via acmurthy) + + MAPREDUCE-3013. Removed YarnConfiguration.YARN_SECURITY_INFO and its usage + as it doesn't affect security any more. (vinodkv) + + MAPREDUCE-2907. Changed log level for various messages in ResourceManager + from INFO to DEBUG. (Ravi Prakash via vinodkv) + + MAPREDUCE-2702. Added a new API in OutputCommitter for recovering + the outputs of tasks from a crashed job so as to support MR Application + Master recovery. (Sharad Agarwal and Arun C Murthy via vinodkv) + + MAPREDUCE-2738. Added the missing cluster level statisticss on the RM web + UI. (Robert Joseph Evans via vinodkv) + OPTIMIZATIONS MAPREDUCE-2026. Make JobTracker.getJobCounters() and @@ -343,9 +376,6 @@ Release 0.23.0 - Unreleased MAPREDUCE-2539. Fixed NPE in getMapTaskReports in JobClient. (Robert Evans via acmurthy) - MAPREDUCE-2531. Fixed jobcontrol to downgrade JobID. (Robert Evans via - acmurthy) - MAPREDUCE-1978. Rumen TraceBuilder should provide recursive input folder scanning. @@ -1472,6 +1502,33 @@ Release 0.23.0 - Unreleased MAPREDUCE-3078. Ensure MapReduce AM reports progress correctly for displaying on the RM Web-UI. (vinodkv via acmurthy) + MAPREDUCE-3114. Fixed invalid ApplicationURL on RM WebUI. (Subroto Sanyal + via vinodkv) + + MAPREDUCE-2791. Added missing info on 'job -status' output. (Devaraj K via + acmurthy) + + MAPREDUCE-2996. Add uber-ness information to JobHistory. (Jonathan Eagles + via acmurthy) + + MAPREDUCE-3050. Add ability to get resource usage information for + applications and nodes. (Robert Evans via acmurthy) + + MAPREDUCE-3113. Ensure bin/yarn and bin/yarn-daemon.sh identify the root + of the install properly. (Xie Xianshan via acmurthy) + + MAPREDUCE-2792. Replace usage of node ip-addresses with hostnames. + (vinodkv via acmurthy) + + MAPREDUCE-3112. Fixed recursive sourcing of HADOOP_OPTS environment + variable. (Eric Yang) + + MAPREDUCE-3056. Changed the default staging directory to not include + user.name to prevent issues with non-secure mode. (Devaraj K via vinodkv) + + MAPREDUCE-2913. Fixed TestMRJobs.testFailingMapper to assert the correct + TaskCompletionEventStatus. (Jonathan Eagles via vinodkv) + Release 0.22.0 - Unreleased INCOMPATIBLE CHANGES @@ -2057,6 +2114,12 @@ Release 0.22.0 - Unreleased MAPREDUCE-2991. queueinfo.jsp fails to show queue status for Capacity scheduler if queue names contain special symbols. (Priyo Mustafi via shv) + MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. + (Ming Ma via shv) + + MAPREDUCE-2531. Fixed jobcontrol to downgrade JobID. (Robert Evans via + acmurthy) + Release 0.21.1 - Unreleased NEW FEATURES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 8b7d578fc9..be9a377611 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.security.PrivilegedExceptionAction; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -84,7 +85,6 @@ import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.service.CompositeService; import org.apache.hadoop.yarn.service.Service; @@ -657,14 +657,29 @@ public static void main(String[] args) { new CompositeServiceShutdownHook(appMaster)); YarnConfiguration conf = new YarnConfiguration(new JobConf()); conf.addResource(new Path(MRJobConfig.JOB_CONF_FILE)); - conf.set(MRJobConfig.USER_NAME, - System.getProperty("user.name")); - UserGroupInformation.setConfiguration(conf); - appMaster.init(conf); - appMaster.start(); + String jobUserName = System + .getenv(ApplicationConstants.Environment.USER.name()); + conf.set(MRJobConfig.USER_NAME, jobUserName); + initAndStartAppMaster(appMaster, conf, jobUserName); } catch (Throwable t) { LOG.fatal("Error starting MRAppMaster", t); System.exit(1); } - } + } + + protected static void initAndStartAppMaster(final MRAppMaster appMaster, + final YarnConfiguration conf, String jobUserName) throws IOException, + InterruptedException { + UserGroupInformation.setConfiguration(conf); + UserGroupInformation appMasterUgi = UserGroupInformation + .createRemoteUser(jobUserName); + appMasterUgi.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + appMaster.init(conf); + appMaster.start(); + return null; + } + }); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java index f84a4d9dbe..faf11a117c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java @@ -114,7 +114,7 @@ public MRClientService(AppContext appContext) { } public void start() { - Configuration conf = new Configuration(getConfig()); // Just for not messing up sec-info class config + Configuration conf = getConfig(); YarnRPC rpc = YarnRPC.create(conf); InetSocketAddress address = NetUtils.createSocketAddr("0.0.0.0:0"); InetAddress hostNameResolved = null; @@ -134,9 +134,6 @@ public void start() { ApplicationTokenIdentifier identifier = new ApplicationTokenIdentifier(this.appContext.getApplicationID()); secretManager.setMasterKey(identifier, bytes); - conf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); // Same for now. } server = rpc.getServer(MRClientProtocol.class, protocolHandler, address, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index c26bc24695..4a47462a2d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -590,12 +590,12 @@ public JobReport getReport() { if (getState() == JobState.NEW) { return MRBuilderUtils.newJobReport(jobId, jobName, username, state, startTime, finishTime, setupProgress, 0.0f, - 0.0f, cleanupProgress); + 0.0f, cleanupProgress, remoteJobConfFile.toString()); } return MRBuilderUtils.newJobReport(jobId, jobName, username, state, startTime, finishTime, setupProgress, computeProgress(mapTasks), - computeProgress(reduceTasks), cleanupProgress); + computeProgress(reduceTasks), cleanupProgress, remoteJobConfFile.toString()); } finally { readLock.unlock(); } @@ -991,10 +991,9 @@ protected void setup(JobImpl job) throws IOException { String user = UserGroupInformation.getCurrentUser().getShortUserName(); Path path = MRApps.getStagingAreaDir(job.conf, user); - LOG.info("DEBUG --- startJobs:" - + " parent=" - + path + " child=" - + oldJobIDString); + if(LOG.isDebugEnabled()) { + LOG.debug("startJobs: parent=" + path + " child=" + oldJobIDString); + } job.remoteJobSubmitDir = FileSystem.get(job.conf).makeQualified( @@ -1135,7 +1134,8 @@ public void transition(JobImpl job, JobEvent event) { new JobInitedEvent(job.oldJobId, job.startTime, job.numMapTasks, job.numReduceTasks, - job.getState().toString()); //Will transition to state running. Currently in INITED + job.getState().toString(), + job.isUber()); //Will transition to state running. Currently in INITED job.eventHandler.handle(new JobHistoryEvent(job.jobId, jie)); JobInfoChangeEvent jice = new JobInfoChangeEvent(job.oldJobId, job.submitTime, job.startTime); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index 95e17d8f4f..6e7996b6da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -91,17 +91,11 @@ public ContainerLauncherImpl(AppContext context) { @Override public synchronized void init(Configuration conf) { - // Clone configuration for this component so that the SecurityInfo setting - // doesn't affect the original configuration - Configuration myLocalConfig = new Configuration(conf); - myLocalConfig.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ContainerManagerSecurityInfo.class, SecurityInfo.class); this.recordFactory = RecordFactoryProvider.getRecordFactory(conf); this.limitOnPoolSize = conf.getInt( MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT, MRJobConfig.DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT); - super.init(myLocalConfig); + super.init(conf); } public void start() { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index 15a7e3f6a5..91d60c2081 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; @@ -75,7 +76,7 @@ public abstract class RMCommunicator extends AbstractService { private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); - + private final AppContext context; private Job job; @@ -146,8 +147,8 @@ protected float getApplicationProgress() { protected void register() { //Register - String host = - clientService.getBindAddress().getAddress().getHostAddress(); + String host = clientService.getBindAddress().getAddress() + .getCanonicalHostName(); try { RegisterApplicationMasterRequest request = recordFactory.newRecordInstance(RegisterApplicationMasterRequest.class); @@ -155,7 +156,7 @@ protected void register() { request.setHost(host); request.setRpcPort(clientService.getBindAddress().getPort()); request.setTrackingUrl(host + ":" + clientService.getHttpPort()); - RegisterApplicationMasterResponse response = + RegisterApplicationMasterResponse response = scheduler.registerApplicationMaster(request); minContainerCapability = response.getMinimumResourceCapability(); maxContainerCapability = response.getMaximumResourceCapability(); @@ -169,29 +170,29 @@ protected void register() { protected void unregister() { try { - String finalState = "RUNNING"; + FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; if (job.getState() == JobState.SUCCEEDED) { - finalState = "SUCCEEDED"; + finishState = FinalApplicationStatus.SUCCEEDED; } else if (job.getState() == JobState.KILLED) { - finalState = "KILLED"; + finishState = FinalApplicationStatus.KILLED; } else if (job.getState() == JobState.FAILED || job.getState() == JobState.ERROR) { - finalState = "FAILED"; + finishState = FinalApplicationStatus.FAILED; } StringBuffer sb = new StringBuffer(); for (String s : job.getDiagnostics()) { sb.append(s).append("\n"); } LOG.info("Setting job diagnostics to " + sb.toString()); - - String historyUrl = JobHistoryUtils.getHistoryUrl(getConfig(), + + String historyUrl = JobHistoryUtils.getHistoryUrl(getConfig(), context.getApplicationID()); LOG.info("History url is " + historyUrl); FinishApplicationMasterRequest request = recordFactory.newRecordInstance(FinishApplicationMasterRequest.class); request.setAppAttemptId(this.applicationAttemptId); - request.setFinalState(finalState.toString()); + request.setFinishApplicationStatus(finishState); request.setDiagnostics(sb.toString()); request.setTrackingUrl(historyUrl); scheduler.finishApplicationMaster(request); @@ -203,7 +204,7 @@ protected void unregister() { protected Resource getMinContainerCapability() { return minContainerCapability; } - + protected Resource getMaxContainerCapability() { return maxContainerCapability; } @@ -246,7 +247,7 @@ public void run() { protected AMRMProtocol createSchedulerProxy() { final YarnRPC rpc = YarnRPC.create(getConfig()); - final Configuration conf = new Configuration(getConfig()); + final Configuration conf = getConfig(); final String serviceAddr = conf.get( YarnConfiguration.RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS); @@ -259,9 +260,6 @@ protected AMRMProtocol createSchedulerProxy() { } if (UserGroupInformation.isSecurityEnabled()) { - conf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); - String tokenURLEncodedStr = System.getenv().get( ApplicationConstants.APPLICATION_MASTER_TOKEN_ENV_NAME); LOG.debug("AppMasterToken is " + tokenURLEncodedStr); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMParams.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMParams.java index 7dfdefad7d..ad8a15d45a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMParams.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMParams.java @@ -28,4 +28,6 @@ public interface AMParams { static final String TASK_ID = "task.id"; static final String TASK_TYPE = "task.type"; static final String ATTEMPT_STATE = "attempt.state"; + static final String COUNTER_GROUP = "counter.group"; + static final String COUNTER_NAME = "counter.name"; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebApp.java index a018c45be5..55601180a3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebApp.java @@ -34,9 +34,14 @@ public void setup() { route(pajoin("/job", JOB_ID), AppController.class, "job"); route(pajoin("/conf", JOB_ID), AppController.class, "conf"); route(pajoin("/jobcounters", JOB_ID), AppController.class, "jobCounters"); + route(pajoin("/singlejobcounter",JOB_ID, COUNTER_GROUP, COUNTER_NAME), + AppController.class, "singleJobCounter"); route(pajoin("/tasks", JOB_ID, TASK_TYPE), AppController.class, "tasks"); route(pajoin("/attempts", JOB_ID, TASK_TYPE, ATTEMPT_STATE), AppController.class, "attempts"); route(pajoin("/task", TASK_ID), AppController.class, "task"); + route(pajoin("/taskcounters", TASK_ID), AppController.class, "taskCounters"); + route(pajoin("/singletaskcounter",TASK_ID, COUNTER_GROUP, COUNTER_NAME), + AppController.class, "singleTaskCounter"); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java index bc4564faae..eff721d17f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java @@ -20,6 +20,8 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; +import java.io.IOException; +import java.net.URLDecoder; import java.util.Locale; import javax.servlet.http.HttpServletResponse; @@ -30,7 +32,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.util.Apps; +import org.apache.hadoop.yarn.util.StringHelper; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.Controller; import org.apache.hadoop.yarn.webapp.View; @@ -41,7 +43,7 @@ * This class renders the various pages that the web app supports. */ public class AppController extends Controller implements AMParams { - final App app; + protected final App app; protected AppController(App app, Configuration conf, RequestContext ctx, String title) { @@ -109,6 +111,54 @@ public void jobCounters() { } render(countersPage()); } + + /** + * Display a page showing a task's counters + */ + public void taskCounters() { + requireTask(); + if (app.getTask() != null) { + setTitle(StringHelper.join("Counters for ", $(TASK_ID))); + } + render(countersPage()); + } + + /** + * @return the class that will render the /singlejobcounter page + */ + protected Class singleCounterPage() { + return SingleCounterPage.class; + } + + /** + * Render the /singlejobcounter page + * @throws IOException on any error. + */ + public void singleJobCounter() throws IOException{ + requireJob(); + set(COUNTER_GROUP, URLDecoder.decode($(COUNTER_GROUP), "UTF-8")); + set(COUNTER_NAME, URLDecoder.decode($(COUNTER_NAME), "UTF-8")); + if (app.getJob() != null) { + setTitle(StringHelper.join($(COUNTER_GROUP)," ",$(COUNTER_NAME), + " for ", $(JOB_ID))); + } + render(singleCounterPage()); + } + + /** + * Render the /singletaskcounter page + * @throws IOException on any error. + */ + public void singleTaskCounter() throws IOException{ + requireTask(); + set(COUNTER_GROUP, URLDecoder.decode($(COUNTER_GROUP), "UTF-8")); + set(COUNTER_NAME, URLDecoder.decode($(COUNTER_NAME), "UTF-8")); + if (app.getTask() != null) { + setTitle(StringHelper.join($(COUNTER_GROUP)," ",$(COUNTER_NAME), + " for ", $(TASK_ID))); + } + render(singleCounterPage()); + } /** * @return the class that will render the /tasks page diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java index bd95599796..a23821ec4b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java @@ -61,6 +61,29 @@ public class CountersBlock extends HtmlBlock { p()._("Sorry, no counters for nonexistent", $(TASK_ID, "task"))._(); return; } + + if(total == null || total.getAllCounterGroups() == null || + total.getAllCounterGroups().size() <= 0) { + String type = $(TASK_ID); + if(type == null || type.isEmpty()) { + type = $(JOB_ID, "the job"); + } + html. + p()._("Sorry it looks like ",type," has no counters.")._(); + return; + } + + String urlBase; + String urlId; + if(task != null) { + urlBase = "singletaskcounter"; + urlId = MRApps.toString(task.getID()); + } else { + urlBase = "singlejobcounter"; + urlId = MRApps.toString(job.getID()); + } + + int numGroups = 0; TBODY>> tbody = html. div(_INFO_WRAP). @@ -79,12 +102,13 @@ public class CountersBlock extends HtmlBlock { // serves as an indicator of where we're in the tag hierarchy. TR>>>>>>> groupHeadRow = tbody. tr(). - th().$title(g.getName()). + th().$title(g.getName()).$class("ui-state-default"). _(fixGroupDisplayName(g.getDisplayName()))._(). td().$class(C_TABLE). table(".dt-counters"). thead(). tr().th(".name", "Name"); + if (map != null) { groupHeadRow.th("Map").th("Reduce"); } @@ -97,7 +121,9 @@ public class CountersBlock extends HtmlBlock { TR>>>>>>> groupRow = group. tr(). td().$title(counter.getName()). - _(counter.getDisplayName())._(); + a(url(urlBase,urlId,g.getName(), + counter.getName()), counter.getDisplayName()). + _(); if (map != null) { Counter mc = mg == null ? null : mg.getCounter(counter.getName()); Counter rc = rg == null ? null : rg.getCounter(counter.getName()); @@ -121,7 +147,7 @@ private void getCounters(AppContext ctx) { jobID = taskID.getJobId(); } else { String jid = $(JOB_ID); - if (!jid.isEmpty()) { + if (jid != null && !jid.isEmpty()) { jobID = MRApps.toJobID(jid); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersPage.java index 9bd5ed1999..da5a34b945 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersPage.java @@ -20,13 +20,19 @@ import org.apache.hadoop.yarn.webapp.SubView; +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_ID; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; public class CountersPage extends AppView { @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); - set(initID(ACCORDION, "nav"), "{autoHeight:false, active:2}"); + String tid = $(TASK_ID); + String activeNav = "3"; + if(tid == null || tid.isEmpty()) { + activeNav = "2"; + } + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:"+activeNav+"}"); set(DATATABLES_SELECTOR, "#counters .dt-counters"); set(initSelector(DATATABLES), "{bJQueryUI:true, sDom:'t', iDisplayLength:-1}"); @@ -35,9 +41,9 @@ public class CountersPage extends AppView { @Override protected void postHead(Page.HTML<_> html) { html. style("#counters, .dt-counters { table-layout: fixed }", - "#counters th { overflow: hidden; vertical-align: center }", + "#counters th { overflow: hidden; vertical-align: middle }", "#counters .dataTables_wrapper { min-height: 1em }", - "#counters .group { width: 10em }", + "#counters .group { width: 15em }", "#counters .name { width: 30em }"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java index 8b4524ad11..de56f5a222 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java @@ -55,6 +55,14 @@ public class NavBlock extends HtmlBlock { li().a(url("conf", jobid), "Configuration")._(). li().a(url("tasks", jobid, "m"), "Map tasks")._(). li().a(url("tasks", jobid, "r"), "Reduce tasks")._()._(); + if (app.getTask() != null) { + String taskid = MRApps.toString(app.getTask().getID()); + nav. + h3("Task"). + ul(). + li().a(url("task", taskid), "Task Overview")._(). + li().a(url("taskcounters", taskid), "Counters")._()._(); + } } nav. h3("Tools"). diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java new file mode 100644 index 0000000000..1ec774e3fb --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java @@ -0,0 +1,151 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.app.webapp; + +import com.google.inject.Inject; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.mapreduce.v2.api.records.Counter; +import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; +import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.util.MRApps; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TR; +import org.apache.hadoop.yarn.webapp.view.HtmlBlock; + +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMWebApp.*; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; + +public class SingleCounterBlock extends HtmlBlock { + protected TreeMap values = new TreeMap(); + protected Job job; + protected Task task; + + @Inject SingleCounterBlock(AppContext appCtx, ViewContext ctx) { + super(ctx); + this.populateMembers(appCtx); + } + + @Override protected void render(Block html) { + if (job == null) { + html. + p()._("Sorry, no counters for nonexistent", $(JOB_ID, "job"))._(); + return; + } + if (!$(TASK_ID).isEmpty() && task == null) { + html. + p()._("Sorry, no counters for nonexistent", $(TASK_ID, "task"))._(); + return; + } + + String columnType = task == null ? "Task" : "Task Attempt"; + + TBODY>> tbody = html. + div(_INFO_WRAP). + table("#singleCounter"). + thead(). + tr(). + th(".ui-state-default", columnType). + th(".ui-state-default", "Value")._()._(). + tbody(); + for (Map.Entry entry : values.entrySet()) { + TR>>> row = tbody.tr(); + String id = entry.getKey(); + String val = entry.getValue().toString(); + if(task != null) { + row.td(id); + row.td().br().$title(val)._()._(val)._(); + } else { + row.td().a(url("singletaskcounter",entry.getKey(), + $(COUNTER_GROUP), $(COUNTER_NAME)), id)._(); + row.td().br().$title(val)._().a(url("singletaskcounter",entry.getKey(), + $(COUNTER_GROUP), $(COUNTER_NAME)), val)._(); + } + row._(); + } + tbody._()._()._(); + } + + private void populateMembers(AppContext ctx) { + JobId jobID = null; + TaskId taskID = null; + String tid = $(TASK_ID); + if (!tid.isEmpty()) { + taskID = MRApps.toTaskID(tid); + jobID = taskID.getJobId(); + } else { + String jid = $(JOB_ID); + if (!jid.isEmpty()) { + jobID = MRApps.toJobID(jid); + } + } + if (jobID == null) { + return; + } + job = ctx.getJob(jobID); + if (job == null) { + return; + } + if (taskID != null) { + task = job.getTask(taskID); + if (task == null) { + return; + } + for(Map.Entry entry : + task.getAttempts().entrySet()) { + long value = 0; + CounterGroup group = entry.getValue().getCounters() + .getCounterGroup($(COUNTER_GROUP)); + if(group != null) { + Counter c = group.getCounter($(COUNTER_NAME)); + if(c != null) { + value = c.getValue(); + } + } + values.put(MRApps.toString(entry.getKey()), value); + } + + return; + } + // Get all types of counters + Map tasks = job.getTasks(); + for(Map.Entry entry : tasks.entrySet()) { + long value = 0; + CounterGroup group = entry.getValue().getCounters() + .getCounterGroup($(COUNTER_GROUP)); + if(group != null) { + Counter c = group.getCounter($(COUNTER_NAME)); + if(c != null) { + value = c.getValue(); + } + } + values.put(MRApps.toString(entry.getKey()), value); + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterPage.java new file mode 100644 index 0000000000..729b5a8c49 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterPage.java @@ -0,0 +1,69 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.app.webapp; + +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_ID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; + +import org.apache.hadoop.mapreduce.v2.app.webapp.SingleCounterBlock; +import org.apache.hadoop.yarn.webapp.SubView; + +/** + * Render the counters page + */ +public class SingleCounterPage extends AppView { + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.hs.webapp.HsView#preHead(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.HTML) + */ + @Override protected void preHead(Page.HTML<_> html) { + commonPreHead(html); + String tid = $(TASK_ID); + String activeNav = "3"; + if(tid == null || tid.isEmpty()) { + activeNav = "2"; + } + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:"+activeNav+"}"); + set(DATATABLES_ID, "singleCounter"); + set(initID(DATATABLES, "singleCounter"), counterTableInit()); + setTableStyles(html, "singleCounter"); + } + + /** + * @return The end of a javascript map that is the jquery datatable + * configuration for the jobs table. the Jobs table is assumed to be + * rendered by the class returned from {@link #content()} + */ + private String counterTableInit() { + return tableInit(). + append(",aoColumnDefs:["). + append("{'sType':'title-numeric', 'aTargets': [ 1 ] }"). + append("]}"). + toString(); + } + + /** + * The content of this page is the CountersBlock now. + * @return CountersBlock.class + */ + @Override protected Class content() { + return SingleCounterBlock.class; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java index 736bef639e..9918f66c80 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java @@ -108,7 +108,7 @@ protected Collection getTaskAttempts() { @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); - set(initID(ACCORDION, "nav"), "{autoHeight:false, active:2}"); + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:3}"); set(DATATABLES_ID, "attempts"); set(initID(DATATABLES, "attempts"), attemptsTableInit()); setTableStyles(html, "attempts"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java new file mode 100644 index 0000000000..c21c4528fb --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRAppMaster.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.v2.app; + +import java.io.IOException; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.v2.util.MRApps; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.junit.Test; + +public class TestMRAppMaster { + @Test + public void testMRAppMasterForDifferentUser() throws IOException, + InterruptedException { + String applicationAttemptIdStr = "appattempt_1317529182569_0004_000001"; + String stagingDir = "/tmp/staging"; + String userName = "TestAppMasterUser"; + ApplicationAttemptId applicationAttemptId = ConverterUtils + .toApplicationAttemptId(applicationAttemptIdStr); + MRAppMasterTest appMaster = new MRAppMasterTest(applicationAttemptId); + YarnConfiguration conf = new YarnConfiguration(); + conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); + MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); + Assert.assertEquals(stagingDir + Path.SEPARATOR + userName + Path.SEPARATOR + + ".staging", appMaster.stagingDirPath.toString()); + } +} + +class MRAppMasterTest extends MRAppMaster { + + Path stagingDirPath; + private Configuration conf; + + public MRAppMasterTest(ApplicationAttemptId applicationAttemptId) { + super(applicationAttemptId); + } + + @Override + public void init(Configuration conf) { + this.conf = conf; + } + + @Override + public void start() { + try { + String user = UserGroupInformation.getCurrentUser().getShortUserName(); + stagingDirPath = MRApps.getStagingAreaDir(conf, user); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java index a1eb928919..53e94db42b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java @@ -116,7 +116,7 @@ public void testSimple() throws Exception { Job mockJob = mock(Job.class); when(mockJob.getReport()).thenReturn( MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, - 0, 0, 0, 0, 0, 0)); + 0, 0, 0, 0, 0, 0, "jobfile")); MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob); @@ -193,7 +193,7 @@ public void testResource() throws Exception { Job mockJob = mock(Job.class); when(mockJob.getReport()).thenReturn( MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, - 0, 0, 0, 0, 0, 0)); + 0, 0, 0, 0, 0, 0, "jobfile")); MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob); @@ -259,7 +259,7 @@ public void testMapReduceScheduling() throws Exception { Job mockJob = mock(Job.class); when(mockJob.getReport()).thenReturn( MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, - 0, 0, 0, 0, 0, 0)); + 0, 0, 0, 0, 0, 0, "jobfile")); MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob); @@ -373,7 +373,7 @@ void setProgress(float setupProgress, float mapProgress, public JobReport getReport() { return MRBuilderUtils.newJobReport(this.jobId, "job", "user", JobState.RUNNING, 0, 0, this.setupProgress, this.mapProgress, - this.reduceProgress, this.cleanupProgress); + this.reduceProgress, this.cleanupProgress, "jobfile"); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java index f9fa04efda..745eedcb86 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.webapp.WebApps; import org.apache.hadoop.yarn.webapp.test.WebAppTests; import org.junit.Test; @@ -87,6 +86,7 @@ public Map getAllJobs() { return jobs; // OK } + @SuppressWarnings("rawtypes") @Override public EventHandler getEventHandler() { return null; @@ -163,6 +163,23 @@ public static Map getTaskParams(AppContext appContext) { new TestAppContext()); } + @Test public void testCountersView() { + AppContext appContext = new TestAppContext(); + Map params = getJobParams(appContext); + WebAppTests.testPage(CountersPage.class, AppContext.class, + appContext, params); + } + + @Test public void testSingleCounterView() { + AppContext appContext = new TestAppContext(); + Map params = getJobParams(appContext); + params.put(AMParams.COUNTER_GROUP, + "org.apache.hadoop.mapreduce.FileSystemCounter"); + params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS"); + WebAppTests.testPage(SingleCounterPage.class, AppContext.class, + appContext, params); + } + public static void main(String[] args) { WebApps.$for("yarn", AppContext.class, new TestAppContext(0, 8, 88, 4)). at(58888).inDevMode().start(new AMWebApp()).joinThread(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java index 9f221e6354..be6e6d9f20 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.List; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobPriority; import org.apache.hadoop.mapred.TaskCompletionEvent; @@ -44,7 +45,7 @@ import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueState; @@ -55,7 +56,7 @@ public class TypeConverter { private static RecordFactory recordFactory; - + static { recordFactory = RecordFactoryProvider.getRecordFactory(null); } @@ -74,7 +75,7 @@ public static org.apache.hadoop.mapreduce.JobID fromYarn(ApplicationId appID) { public static JobId toYarn(org.apache.hadoop.mapreduce.JobID id) { JobId jobId = recordFactory.newRecordInstance(JobId.class); jobId.setId(id.getId()); //currently there is 1-1 mapping between appid and jobid - + ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setId(id.getId()); appId.setClusterTimestamp(toClusterTimeStamp(id.getJtIdentifier())); @@ -136,7 +137,7 @@ public static TaskAttemptState toYarn(org.apache.hadoop.mapred.TaskStatus.State } return TaskAttemptState.valueOf(state.toString()); } - + public static Phase toYarn(org.apache.hadoop.mapred.TaskStatus.Phase phase) { switch (phase) { case STARTING: @@ -160,7 +161,7 @@ public static TaskCompletionEvent[] fromYarn( TaskCompletionEvent[] oldEvents = new TaskCompletionEvent[newEvents.length]; int i = 0; - for (TaskAttemptCompletionEvent newEvent + for (TaskAttemptCompletionEvent newEvent : newEvents) { oldEvents[i++] = fromYarn(newEvent); } @@ -214,19 +215,19 @@ public static TaskAttemptId toYarn( taskAttemptId.setId(id.getId()); return taskAttemptId; } - + public static org.apache.hadoop.mapreduce.Counters fromYarn( Counters yCntrs) { if (yCntrs == null) { return null; } - org.apache.hadoop.mapreduce.Counters counters = + org.apache.hadoop.mapreduce.Counters counters = new org.apache.hadoop.mapreduce.Counters(); for (CounterGroup yGrp : yCntrs.getAllCounterGroups().values()) { counters.addGroup(yGrp.getName(), yGrp.getDisplayName()); for (Counter yCntr : yGrp.getAllCounters().values()) { - org.apache.hadoop.mapreduce.Counter c = - counters.findCounter(yGrp.getName(), + org.apache.hadoop.mapreduce.Counter c = + counters.findCounter(yGrp.getName(), yCntr.getName()); c.setValue(yCntr.getValue()); } @@ -280,29 +281,27 @@ public static Counters toYarn(org.apache.hadoop.mapreduce.Counters counters) { return yCntrs; } - public static org.apache.hadoop.mapred.JobStatus fromYarn( - JobReport jobreport, String jobFile) { + public static JobStatus fromYarn(JobReport jobreport, String trackingUrl) { JobPriority jobPriority = JobPriority.NORMAL; - org.apache.hadoop.mapred.JobStatus jobStatus = - new org.apache.hadoop.mapred.JobStatus(fromYarn(jobreport.getJobId()), - jobreport.getSetupProgress(), jobreport.getMapProgress(), - jobreport.getReduceProgress(), jobreport.getCleanupProgress(), - fromYarn(jobreport.getJobState()), - jobPriority, jobreport.getUser(), jobreport.getJobName(), - jobFile, jobreport.getTrackingUrl()); + JobStatus jobStatus = new org.apache.hadoop.mapred.JobStatus( + fromYarn(jobreport.getJobId()), jobreport.getSetupProgress(), jobreport + .getMapProgress(), jobreport.getReduceProgress(), jobreport + .getCleanupProgress(), fromYarn(jobreport.getJobState()), + jobPriority, jobreport.getUser(), jobreport.getJobName(), jobreport + .getJobFile(), trackingUrl); jobStatus.setFailureInfo(jobreport.getDiagnostics()); return jobStatus; } - + public static org.apache.hadoop.mapreduce.QueueState fromYarn( QueueState state) { - org.apache.hadoop.mapreduce.QueueState qState = + org.apache.hadoop.mapreduce.QueueState qState = org.apache.hadoop.mapreduce.QueueState.getState( state.toString().toLowerCase()); return qState; } - + public static int fromYarn(JobState state) { switch (state) { case NEW: @@ -340,7 +339,7 @@ public static org.apache.hadoop.mapred.TIPStatus fromYarn( } throw new YarnException("Unrecognized task state: " + state); } - + public static TaskReport fromYarn(org.apache.hadoop.mapreduce.v2.api.records.TaskReport report) { String[] diagnostics = null; if (report.getDiagnosticsList() != null) { @@ -352,14 +351,14 @@ public static TaskReport fromYarn(org.apache.hadoop.mapreduce.v2.api.records.Tas } else { diagnostics = new String[0]; } - - TaskReport rep = new TaskReport(fromYarn(report.getTaskId()), + + TaskReport rep = new TaskReport(fromYarn(report.getTaskId()), report.getProgress(), report.getTaskState().toString(), diagnostics, fromYarn(report.getTaskState()), report.getStartTime(), report.getFinishTime(), fromYarn(report.getCounters())); - List runningAtts + List runningAtts = new ArrayList(); - for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId id + for (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId id : report.getRunningAttemptsList()) { runningAtts.add(fromYarn(id)); } @@ -369,7 +368,7 @@ diagnostics, fromYarn(report.getTaskState()), report.getStartTime(), report.getF } return rep; } - + public static List fromYarn( List taskReports) { List reports = new ArrayList(); @@ -378,14 +377,14 @@ public static List fromYarn( } return reports; } - - public static JobStatus.State fromYarn(ApplicationState state) { + + public static JobStatus.State fromYarn(YarnApplicationState state) { switch (state) { case SUBMITTED: return State.PREP; case RUNNING: return State.RUNNING; - case SUCCEEDED: + case FINISHED: return State.SUCCEEDED; case FAILED: return State.FAILED; @@ -397,7 +396,7 @@ public static JobStatus.State fromYarn(ApplicationState state) { private static final String TT_NAME_PREFIX = "tracker_"; public static TaskTrackerInfo fromYarn(NodeReport node) { - TaskTrackerInfo taskTracker = + TaskTrackerInfo taskTracker = new TaskTrackerInfo(TT_NAME_PREFIX + node.getNodeId().toString()); return taskTracker; } @@ -418,7 +417,7 @@ public static JobStatus fromYarn(ApplicationReport application, new JobStatus( TypeConverter.fromYarn(application.getApplicationId()), 0.0f, 0.0f, 0.0f, 0.0f, - TypeConverter.fromYarn(application.getState()), + TypeConverter.fromYarn(application.getYarnApplicationState()), org.apache.hadoop.mapreduce.JobPriority.NORMAL, application.getUser(), application.getName(), application.getQueue(), jobFile, trackingUrl @@ -434,7 +433,7 @@ public static JobStatus[] fromYarnApps(List applications, List jobStatuses = new ArrayList(); for (ApplicationReport application : applications) { // each applicationReport has its own jobFile - org.apache.hadoop.mapreduce.JobID jobId = + org.apache.hadoop.mapreduce.JobID jobId = TypeConverter.fromYarn(application.getApplicationId()); jobStatuses.add(TypeConverter.fromYarn(application, MRApps.getJobFile(conf, application.getUser(), jobId))); @@ -442,14 +441,14 @@ public static JobStatus[] fromYarnApps(List applications, return jobStatuses.toArray(new JobStatus[jobStatuses.size()]); } - - public static QueueInfo fromYarn(org.apache.hadoop.yarn.api.records.QueueInfo + + public static QueueInfo fromYarn(org.apache.hadoop.yarn.api.records.QueueInfo queueInfo, Configuration conf) { return new QueueInfo(queueInfo.getQueueName(),queueInfo.toString(), fromYarn(queueInfo.getQueueState()), TypeConverter.fromYarnApps( queueInfo.getApplications(), conf)); } - + public static QueueInfo[] fromYarnQueueInfo( List queues, Configuration conf) { @@ -468,9 +467,9 @@ public static QueueAclsInfo[] fromYarnQueueUserAclsInfo( for (QueueACL qAcl : aclInfo.getUserAcls()) { operations.add(qAcl.toString()); } - - QueueAclsInfo acl = - new QueueAclsInfo(aclInfo.getQueueName(), + + QueueAclsInfo acl = + new QueueAclsInfo(aclInfo.getQueueName(), operations.toArray(new String[operations.size()])); acls.add(acl); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java index 0bfc9db3ed..87b77b7f80 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java @@ -31,6 +31,7 @@ public interface JobReport { public abstract String getJobName(); public abstract String getTrackingUrl(); public abstract String getDiagnostics(); + public abstract String getJobFile(); public abstract void setJobId(JobId jobId); public abstract void setJobState(JobState jobState); @@ -44,4 +45,5 @@ public interface JobReport { public abstract void setJobName(String jobName); public abstract void setTrackingUrl(String trackingUrl); public abstract void setDiagnostics(String diagnostics); + public abstract void setJobFile(String jobFile); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java index c5d2527a9d..2af50b6820 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java @@ -229,7 +229,19 @@ public void setDiagnostics(String diagnostics) { maybeInitBuilder(); builder.setDiagnostics(diagnostics); } + + @Override + public String getJobFile() { + JobReportProtoOrBuilder p = viaProto ? proto : builder; + return p.getJobFile(); + } + @Override + public void setJobFile(String jobFile) { + maybeInitBuilder(); + builder.setJobFile(jobFile); + } + private JobIdPBImpl convertFromProtoFormat(JobIdProto p) { return new JobIdPBImpl(p); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java index d710a6f7b8..543454c15a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java @@ -55,7 +55,7 @@ public static TaskAttemptId newTaskAttemptId(TaskId taskId, int attemptId) { public static JobReport newJobReport(JobId jobId, String jobName, String userName, JobState state, long startTime, long finishTime, float setupProgress, float mapProgress, float reduceProgress, - float cleanupProgress) { + float cleanupProgress, String jobFile) { JobReport report = Records.newRecord(JobReport.class); report.setJobId(jobId); report.setJobName(jobName); @@ -67,6 +67,7 @@ public static JobReport newJobReport(JobId jobId, String jobName, report.setCleanupProgress(cleanupProgress); report.setMapProgress(mapProgress); report.setReduceProgress(reduceProgress); + report.setJobFile(jobFile); return report; } } \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto index 29184da486..a4375c9e67 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto @@ -145,6 +145,7 @@ message JobReportProto { optional string jobName = 10; optional string trackingUrl = 11; optional string diagnostics = 12; + optional string jobFile = 13; } enum TaskAttemptCompletionEventStatusProto { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java index 1aeae987c8..43ca32020d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java @@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationReportPBImpl; @@ -35,11 +35,11 @@ public class TestTypeConverter { @Test public void testFromYarn() throws Exception { int appStartTime = 612354; - ApplicationState state = ApplicationState.RUNNING; + YarnApplicationState state = YarnApplicationState.RUNNING; ApplicationId applicationId = new ApplicationIdPBImpl(); ApplicationReportPBImpl applicationReport = new ApplicationReportPBImpl(); applicationReport.setApplicationId(applicationId); - applicationReport.setState(state); + applicationReport.setYarnApplicationState(state); applicationReport.setStartTime(appStartTime); applicationReport.setUser("TestTypeConverter-user"); JobStatus jobStatus = TypeConverter.fromYarn(applicationReport, "dummy-jobfile"); @@ -56,7 +56,7 @@ public void testFromYarnApplicationReport() { ApplicationReport mockReport = mock(ApplicationReport.class); when(mockReport.getTrackingUrl()).thenReturn("dummy-tracking-url"); when(mockReport.getApplicationId()).thenReturn(mockAppId); - when(mockReport.getState()).thenReturn(ApplicationState.KILLED); + when(mockReport.getYarnApplicationState()).thenReturn(YarnApplicationState.KILLED); when(mockReport.getUser()).thenReturn("dummy-user"); when(mockReport.getQueue()).thenReturn("dummy-queue"); String jobFile = "dummy-path/job.xml"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr index 7825e6e258..baef951e53 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr @@ -64,7 +64,8 @@ {"name": "launchTime", "type": "long"}, {"name": "totalMaps", "type": "int"}, {"name": "totalReduces", "type": "int"}, - {"name": "jobStatus", "type": "string"} + {"name": "jobStatus", "type": "string"}, + {"name": "uberized", "type": "boolean"} ] }, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ContextFactory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ContextFactory.java new file mode 100644 index 0000000000..1b1a85b7af --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ContextFactory.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; + +import org.apache.hadoop.conf.Configuration; + +/** + * A factory to allow applications to deal with inconsistencies between + * MapReduce Context Objects API between hadoop-0.20 and later versions. + */ +public class ContextFactory { + + private static final Constructor JOB_CONTEXT_CONSTRUCTOR; + private static final Constructor TASK_CONTEXT_CONSTRUCTOR; + private static final Constructor MAP_CONTEXT_CONSTRUCTOR; + private static final Constructor MAP_CONTEXT_IMPL_CONSTRUCTOR; + private static final boolean useV21; + + private static final Field REPORTER_FIELD; + private static final Field READER_FIELD; + private static final Field WRITER_FIELD; + private static final Field OUTER_MAP_FIELD; + private static final Field WRAPPED_CONTEXT_FIELD; + + static { + boolean v21 = true; + final String PACKAGE = "org.apache.hadoop.mapreduce"; + try { + Class.forName(PACKAGE + ".task.JobContextImpl"); + } catch (ClassNotFoundException cnfe) { + v21 = false; + } + useV21 = v21; + Class jobContextCls; + Class taskContextCls; + Class taskIOContextCls; + Class mapCls; + Class mapContextCls; + Class innerMapContextCls; + try { + if (v21) { + jobContextCls = + Class.forName(PACKAGE+".task.JobContextImpl"); + taskContextCls = + Class.forName(PACKAGE+".task.TaskAttemptContextImpl"); + taskIOContextCls = + Class.forName(PACKAGE+".task.TaskInputOutputContextImpl"); + mapContextCls = Class.forName(PACKAGE + ".task.MapContextImpl"); + mapCls = Class.forName(PACKAGE + ".lib.map.WrappedMapper"); + innerMapContextCls = + Class.forName(PACKAGE+".lib.map.WrappedMapper$Context"); + } else { + jobContextCls = + Class.forName(PACKAGE+".JobContext"); + taskContextCls = + Class.forName(PACKAGE+".TaskAttemptContext"); + taskIOContextCls = + Class.forName(PACKAGE+".TaskInputOutputContext"); + mapContextCls = Class.forName(PACKAGE + ".MapContext"); + mapCls = Class.forName(PACKAGE + ".Mapper"); + innerMapContextCls = + Class.forName(PACKAGE+".Mapper$Context"); + } + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("Can't find class", e); + } + try { + JOB_CONTEXT_CONSTRUCTOR = + jobContextCls.getConstructor(Configuration.class, JobID.class); + JOB_CONTEXT_CONSTRUCTOR.setAccessible(true); + TASK_CONTEXT_CONSTRUCTOR = + taskContextCls.getConstructor(Configuration.class, + TaskAttemptID.class); + TASK_CONTEXT_CONSTRUCTOR.setAccessible(true); + if (useV21) { + MAP_CONTEXT_CONSTRUCTOR = + innerMapContextCls.getConstructor(mapCls, + MapContext.class); + MAP_CONTEXT_IMPL_CONSTRUCTOR = + mapContextCls.getDeclaredConstructor(Configuration.class, + TaskAttemptID.class, + RecordReader.class, + RecordWriter.class, + OutputCommitter.class, + StatusReporter.class, + InputSplit.class); + MAP_CONTEXT_IMPL_CONSTRUCTOR.setAccessible(true); + WRAPPED_CONTEXT_FIELD = + innerMapContextCls.getDeclaredField("mapContext"); + WRAPPED_CONTEXT_FIELD.setAccessible(true); + } else { + MAP_CONTEXT_CONSTRUCTOR = + innerMapContextCls.getConstructor(mapCls, + Configuration.class, + TaskAttemptID.class, + RecordReader.class, + RecordWriter.class, + OutputCommitter.class, + StatusReporter.class, + InputSplit.class); + MAP_CONTEXT_IMPL_CONSTRUCTOR = null; + WRAPPED_CONTEXT_FIELD = null; + } + MAP_CONTEXT_CONSTRUCTOR.setAccessible(true); + REPORTER_FIELD = taskIOContextCls.getDeclaredField("reporter"); + REPORTER_FIELD.setAccessible(true); + READER_FIELD = mapContextCls.getDeclaredField("reader"); + READER_FIELD.setAccessible(true); + WRITER_FIELD = taskIOContextCls.getDeclaredField("output"); + WRITER_FIELD.setAccessible(true); + OUTER_MAP_FIELD = innerMapContextCls.getDeclaredField("this$0"); + OUTER_MAP_FIELD.setAccessible(true); + } catch (SecurityException e) { + throw new IllegalArgumentException("Can't run constructor ", e); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException("Can't find constructor ", e); + } catch (NoSuchFieldException e) { + throw new IllegalArgumentException("Can't find field ", e); + } + } + + /** + * Clone a job or task attempt context with a new configuration. + * @param original the original context + * @param conf the new configuration + * @return a new context object + * @throws InterruptedException + * @throws IOException + */ + @SuppressWarnings("unchecked") + public static JobContext cloneContext(JobContext original, + Configuration conf + ) throws IOException, + InterruptedException { + try { + if (original instanceof MapContext) { + return cloneMapContext((Mapper.Context) original, conf, null, null); + } else if (original instanceof ReduceContext) { + throw new IllegalArgumentException("can't clone ReduceContext"); + } else if (original instanceof TaskAttemptContext) { + TaskAttemptContext spec = (TaskAttemptContext) original; + return (JobContext) + TASK_CONTEXT_CONSTRUCTOR.newInstance(conf, spec.getTaskAttemptID()); + } else { + return (JobContext) + JOB_CONTEXT_CONSTRUCTOR.newInstance(conf, original.getJobID()); + } + } catch (InstantiationException e) { + throw new IllegalArgumentException("Can't clone object", e); + } catch (IllegalAccessException e) { + throw new IllegalArgumentException("Can't clone object", e); + } catch (InvocationTargetException e) { + throw new IllegalArgumentException("Can't clone object", e); + } + } + + /** + * Copy a mapper context, optionally replacing the input and output. + * @param input key type + * @param input value type + * @param output key type + * @param output value type + * @param context the context to clone + * @param conf a new configuration + * @param reader Reader to read from. Null means to clone from context. + * @param writer Writer to write to. Null means to clone from context. + * @return a new context. it will not be the same class as the original. + * @throws IOException + * @throws InterruptedException + */ + @SuppressWarnings("unchecked") + public static Mapper.Context + cloneMapContext(MapContext context, + Configuration conf, + RecordReader reader, + RecordWriter writer + ) throws IOException, InterruptedException { + try { + // get the outer object pointer + Object outer = OUTER_MAP_FIELD.get(context); + // if it is a wrapped 21 context, unwrap it + if ("org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context".equals + (context.getClass().getName())) { + context = (MapContext) WRAPPED_CONTEXT_FIELD.get(context); + } + // if the reader or writer aren't given, use the same ones + if (reader == null) { + reader = (RecordReader) READER_FIELD.get(context); + } + if (writer == null) { + writer = (RecordWriter) WRITER_FIELD.get(context); + } + if (useV21) { + Object basis = + MAP_CONTEXT_IMPL_CONSTRUCTOR.newInstance(conf, + context.getTaskAttemptID(), + reader, writer, + context.getOutputCommitter(), + REPORTER_FIELD.get(context), + context.getInputSplit()); + return (Mapper.Context) + MAP_CONTEXT_CONSTRUCTOR.newInstance(outer, basis); + } else { + return (Mapper.Context) + MAP_CONTEXT_CONSTRUCTOR.newInstance(outer, + conf, context.getTaskAttemptID(), + reader, writer, + context.getOutputCommitter(), + REPORTER_FIELD.get(context), + context.getInputSplit()); + } + } catch (IllegalAccessException e) { + throw new IllegalArgumentException("Can't access field", e); + } catch (InstantiationException e) { + throw new IllegalArgumentException("Can't create object", e); + } catch (InvocationTargetException e) { + throw new IllegalArgumentException("Can't invoke constructor", e); + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java index f616df80b8..530aee1999 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -462,8 +462,6 @@ public String toString() { sb.append(status.getReduceProgress()).append("\n"); sb.append("Job state: "); sb.append(status.getState()).append("\n"); - sb.append("history URL: "); - sb.append(status.getHistoryFile()).append("\n"); sb.append("retired: ").append(status.isRetired()).append("\n"); sb.append("reason for failure: ").append(reasonforFailure); return sb.toString(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index accfdddc3d..a3e5a6cf61 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -473,4 +473,6 @@ public interface MRJobConfig { public static final String MAPREDUCE_V2_CHILD_CLASS = "org.apache.hadoop.mapred.YarnChild"; + public static final String APPLICATION_ATTEMPT_ID = + "mapreduce.job.application.attempt.id"; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/OutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/OutputCommitter.java index 22ff59aa11..819c32baa9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/OutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/OutputCommitter.java @@ -143,4 +143,35 @@ public abstract void commitTask(TaskAttemptContext taskContext) */ public abstract void abortTask(TaskAttemptContext taskContext) throws IOException; + + /** + * Is task output recovery supported for restarting jobs? + * + * If task output recovery is supported, job restart can be done more + * efficiently. + * + * @return true if task output recovery is supported, + * false otherwise + * @see #recoverTask(TaskAttemptContext) + */ + public boolean isRecoverySupported() { + return false; + } + + /** + * Recover the task output. + * + * The retry-count for the job will be passed via the + * {@link MRJobConfig#APPLICATION_ATTEMPT_ID} key in + * {@link TaskAttemptContext#getConfiguration()} for the + * OutputCommitter. + * + * If an exception is thrown the task will be attempted again. + * + * @param taskContext Context of the task whose output is being recovered + * @throws IOException + */ + public void recoverTask(TaskAttemptContext taskContext) + throws IOException + {} } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java index e27f72d008..fe92cfe376 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java @@ -302,6 +302,7 @@ private void handleJobInitedEvent(JobInitedEvent event) { info.launchTime = event.getLaunchTime(); info.totalMaps = event.getTotalMaps(); info.totalReduces = event.getTotalReduces(); + info.uberized = event.getUberized(); } private void handleJobInfoChangeEvent(JobInfoChangeEvent event) { @@ -346,6 +347,7 @@ public static class JobInfo { Map jobACLs; Map tasksMap; + boolean uberized; /** Create a job info object where job information will be stored * after a parse @@ -373,7 +375,8 @@ public void printAll() { System.out.println("MAP_COUNTERS:" + mapCounters.toString()); System.out.println("REDUCE_COUNTERS:" + reduceCounters.toString()); System.out.println("TOTAL_COUNTERS: " + totalCounters.toString()); - + System.out.println("UBERIZED: " + uberized); + for (TaskInfo ti: tasksMap.values()) { ti.printAll(); } @@ -421,6 +424,8 @@ public void printAll() { /** @return the priority of this job */ public String getPriority() { return priority.toString(); } public Map getJobACLs() { return jobACLs; } + /** @return the uberized status of this job */ + public boolean getUberized() { return uberized; } } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobInitedEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobInitedEvent.java index a1fa9c1b26..099941ec1f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobInitedEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobInitedEvent.java @@ -42,14 +42,16 @@ public class JobInitedEvent implements HistoryEvent { * @param totalMaps * @param totalReduces * @param jobStatus + * @param uberized True if the job's map and reduce stages were combined */ public JobInitedEvent(JobID id, long launchTime, int totalMaps, - int totalReduces, String jobStatus) { + int totalReduces, String jobStatus, boolean uberized) { datum.jobid = new Utf8(id.toString()); datum.launchTime = launchTime; datum.totalMaps = totalMaps; datum.totalReduces = totalReduces; datum.jobStatus = new Utf8(jobStatus); + datum.uberized = uberized; } JobInitedEvent() { } @@ -67,9 +69,10 @@ public JobInitedEvent(JobID id, long launchTime, int totalMaps, public int getTotalReduces() { return datum.totalReduces; } /** Get the status */ public String getStatus() { return datum.jobStatus.toString(); } - /** Get the event type */ + /** Get the event type */ public EventType getEventType() { return EventType.JOB_INITED; } - + /** Get whether the job's map and reduce stages were combined */ + public boolean getUberized() { return datum.uberized; } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java index 3fe4354b17..26390c7df2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java @@ -35,7 +35,6 @@ import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.util.StringUtils; /** An {@link OutputCommitter} that commits files specified * in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}. @@ -69,9 +68,8 @@ public FileOutputCommitter(Path outputPath, this.outputPath = outputPath; outputFileSystem = outputPath.getFileSystem(context.getConfiguration()); workPath = new Path(outputPath, - (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + - "_" + context.getTaskAttemptID().toString() - )).makeQualified(outputFileSystem); + getTaskAttemptBaseDirName(context)) + .makeQualified(outputFileSystem); } } @@ -82,7 +80,8 @@ public FileOutputCommitter(Path outputPath, */ public void setupJob(JobContext context) throws IOException { if (outputPath != null) { - Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME); + Path tmpDir = new Path(outputPath, getJobAttemptBaseDirName(context) + + Path.SEPARATOR + FileOutputCommitter.TEMP_DIR_NAME); FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration()); if (!fileSys.mkdirs(tmpDir)) { LOG.error("Mkdirs failed to create " + tmpDir.toString()); @@ -106,11 +105,27 @@ private void markOutputDirSuccessful(MRJobConfig context) throws IOException { } /** + * Move all job output to the final place. * Delete the temporary directory, including all of the work directories. * Create a _SUCCESS file to make it as successful. * @param context the job's context */ public void commitJob(JobContext context) throws IOException { + //delete the task temp directory from the current jobtempdir + Path tmpDir = new Path(outputPath, getJobAttemptBaseDirName(context) + + Path.SEPARATOR + FileOutputCommitter.TEMP_DIR_NAME); + FileSystem fileSys = tmpDir.getFileSystem(context.getConfiguration()); + if (fileSys.exists(tmpDir)) { + fileSys.delete(tmpDir, true); + } else { + LOG.warn("Task temp dir could not be deleted " + tmpDir); + } + + //move the job output to final place + Path jobOutputPath = + new Path(outputPath, getJobAttemptBaseDirName(context)); + moveJobOutputs(outputFileSystem, outputPath, jobOutputPath); + // delete the _temporary folder and create a _done file in the o/p folder cleanupJob(context); if (shouldMarkOutputDir(context.getConfiguration())) { @@ -118,6 +133,31 @@ public void commitJob(JobContext context) throws IOException { } } + private void moveJobOutputs(FileSystem fs, + Path finalOutputDir, Path jobOutput) throws IOException { + if (fs.isFile(jobOutput)) { + Path finalOutputPath = getFinalPath(finalOutputDir, jobOutput, jobOutput); + if (!fs.rename(jobOutput, finalOutputPath)) { + if (!fs.delete(finalOutputPath, true)) { + throw new IOException("Failed to delete earlier output of job"); + } + if (!fs.rename(jobOutput, finalOutputPath)) { + throw new IOException("Failed to save output of job"); + } + } + LOG.debug("Moved " + jobOutput + " to " + finalOutputPath); + } else if (fs.getFileStatus(jobOutput).isDirectory()) { + FileStatus[] paths = fs.listStatus(jobOutput); + Path finalOutputPath = getFinalPath(finalOutputDir, jobOutput, jobOutput); + fs.mkdirs(finalOutputPath); + if (paths != null) { + for (FileStatus path : paths) { + moveJobOutputs(fs, finalOutputDir, path.getPath()); + } + } + } + } + @Override @Deprecated public void cleanupJob(JobContext context) throws IOException { @@ -163,8 +203,10 @@ public void commitTask(TaskAttemptContext context) if (workPath != null) { context.progress(); if (outputFileSystem.exists(workPath)) { - // Move the task outputs to their final place - moveTaskOutputs(context, outputFileSystem, outputPath, workPath); + // Move the task outputs to the current job attempt output dir + Path jobOutputPath = + new Path(outputPath, getJobAttemptBaseDirName(context)); + moveTaskOutputs(context, outputFileSystem, jobOutputPath, workPath); // Delete the temporary task-specific output directory if (!outputFileSystem.delete(workPath, true)) { LOG.warn("Failed to delete the temporary output" + @@ -271,4 +313,50 @@ public boolean needsTaskCommit(TaskAttemptContext context public Path getWorkPath() throws IOException { return workPath; } + + @Override + public boolean isRecoverySupported() { + return true; + } + + @Override + public void recoverTask(TaskAttemptContext context) + throws IOException { + context.progress(); + Path jobOutputPath = + new Path(outputPath, getJobAttemptBaseDirName(context)); + int previousAttempt = + context.getConfiguration().getInt( + MRJobConfig.APPLICATION_ATTEMPT_ID, 0) - 1; + if (previousAttempt < 0) { + throw new IOException ("Cannot recover task output for first attempt..."); + } + + Path pathToRecover = + new Path(outputPath, getJobAttemptBaseDirName(previousAttempt)); + if (outputFileSystem.exists(pathToRecover)) { + // Move the task outputs to their final place + moveJobOutputs(outputFileSystem, jobOutputPath, pathToRecover); + LOG.info("Saved output of job to " + jobOutputPath); + } + } + + protected static String getJobAttemptBaseDirName(JobContext context) { + int appAttemptId = + context.getConfiguration().getInt( + MRJobConfig.APPLICATION_ATTEMPT_ID, 0); + return getJobAttemptBaseDirName(appAttemptId); + } + + protected static String getJobAttemptBaseDirName(int appAttemptId) { + return FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + + + appAttemptId; + } + + protected static String getTaskAttemptBaseDirName( + TaskAttemptContext context) { + return getJobAttemptBaseDirName(context) + Path.SEPARATOR + + FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + + "_" + context.getTaskAttemptID().toString(); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java index 95f1fccfb7..b6e44d71c4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java @@ -114,15 +114,15 @@ SplitMetaInfo[] writeNewSplits(Configuration conf, if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(T split: array) { - int prevCount = out.size(); + long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer serializer = factory.getSerializer((Class) split.getClass()); serializer.open(out); serializer.serialize(split); - int currCount = out.size(); + long currCount = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, @@ -139,12 +139,12 @@ private static SplitMetaInfo[] writeOldSplits( SplitMetaInfo[] info = new SplitMetaInfo[splits.length]; if (splits.length != 0) { int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(org.apache.hadoop.mapred.InputSplit split: splits) { - int prevLen = out.size(); + long prevLen = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out); - int currLen = out.size(); + long currLen = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, split.getLength()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index f0d534da2a..496de9b44b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1174,7 +1174,7 @@ yarn.app.mapreduce.am.staging-dir - /tmp/hadoop-yarn/${user.name}/staging + /tmp/hadoop-yarn/staging The staging dir used while submitting jobs. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java new file mode 100644 index 0000000000..6e8941bd7e --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java @@ -0,0 +1,290 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.lib.output; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.net.URI; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; + +@SuppressWarnings("unchecked") +public class TestFileOutputCommitter extends TestCase { + private static Path outDir = new Path(System.getProperty("test.build.data", + "/tmp"), "output"); + + // A random task attempt id for testing. + private static String attempt = "attempt_200707121733_0001_m_000000_0"; + private static String partFile = "part-m-00000"; + private static TaskAttemptID taskID = TaskAttemptID.forName(attempt); + private Text key1 = new Text("key1"); + private Text key2 = new Text("key2"); + private Text val1 = new Text("val1"); + private Text val2 = new Text("val2"); + + + private void writeOutput(RecordWriter theRecordWriter, + TaskAttemptContext context) throws IOException, InterruptedException { + NullWritable nullWritable = NullWritable.get(); + + try { + theRecordWriter.write(key1, val1); + theRecordWriter.write(null, nullWritable); + theRecordWriter.write(null, val1); + theRecordWriter.write(nullWritable, val2); + theRecordWriter.write(key2, nullWritable); + theRecordWriter.write(key1, null); + theRecordWriter.write(null, null); + theRecordWriter.write(key2, val2); + } finally { + theRecordWriter.close(context); + } + } + + + public void testRecovery() throws Exception { + Job job = Job.getInstance(); + FileOutputFormat.setOutputPath(job, outDir); + Configuration conf = job.getConfiguration(); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); + JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); + TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); + FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); + + // setup + committer.setupJob(jContext); + committer.setupTask(tContext); + + // write output + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); + writeOutput(theRecordWriter, tContext); + + // do commit + committer.commitTask(tContext); + Path jobTempDir1 = new Path(outDir, + FileOutputCommitter.getJobAttemptBaseDirName( + conf.getInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0))); + assertTrue((new File(jobTempDir1.toString()).exists())); + validateContent(jobTempDir1); + + + + //now while running the second app attempt, + //recover the task output from first attempt + Configuration conf2 = job.getConfiguration(); + conf2.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); + conf2.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 2); + JobContext jContext2 = new JobContextImpl(conf2, taskID.getJobID()); + TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); + FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2); + committer.setupJob(tContext2); + Path jobTempDir2 = new Path(outDir, + FileOutputCommitter.getJobAttemptBaseDirName( + conf2.getInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0))); + assertTrue((new File(jobTempDir2.toString()).exists())); + + tContext2.getConfiguration().setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 2); + committer2.recoverTask(tContext2); + validateContent(jobTempDir2); + + committer2.commitJob(jContext2); + validateContent(outDir); + FileUtil.fullyDelete(new File(outDir.toString())); + } + + private void validateContent(Path dir) throws IOException { + File expectedFile = new File(new Path(dir, partFile).toString()); + StringBuffer expectedOutput = new StringBuffer(); + expectedOutput.append(key1).append('\t').append(val1).append("\n"); + expectedOutput.append(val1).append("\n"); + expectedOutput.append(val2).append("\n"); + expectedOutput.append(key2).append("\n"); + expectedOutput.append(key1).append("\n"); + expectedOutput.append(key2).append('\t').append(val2).append("\n"); + String output = slurp(expectedFile); + assertEquals(output, expectedOutput.toString()); + } + + + public void testCommitter() throws Exception { + Job job = Job.getInstance(); + FileOutputFormat.setOutputPath(job, outDir); + Configuration conf = job.getConfiguration(); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); + JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); + TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); + FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); + + // setup + committer.setupJob(jContext); + committer.setupTask(tContext); + + // write output + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); + writeOutput(theRecordWriter, tContext); + + // do commit + committer.commitTask(tContext); + committer.commitJob(jContext); + + // validate output + validateContent(outDir); + FileUtil.fullyDelete(new File(outDir.toString())); + } + + + public void testAbort() throws IOException, InterruptedException { + Job job = Job.getInstance(); + FileOutputFormat.setOutputPath(job, outDir); + Configuration conf = job.getConfiguration(); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); + JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); + TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); + FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); + + // do setup + committer.setupJob(jContext); + committer.setupTask(tContext); + + // write output + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); + writeOutput(theRecordWriter, tContext); + + // do abort + committer.abortTask(tContext); + File expectedFile = new File(new Path(committer.getWorkPath(), partFile) + .toString()); + assertFalse("task temp dir still exists", expectedFile.exists()); + + committer.abortJob(jContext, JobStatus.State.FAILED); + expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME) + .toString()); + assertFalse("job temp dir still exists", expectedFile.exists()); + assertEquals("Output directory not empty", 0, new File(outDir.toString()) + .listFiles().length); + FileUtil.fullyDelete(new File(outDir.toString())); + } + + public static class FakeFileSystem extends RawLocalFileSystem { + public FakeFileSystem() { + super(); + } + + public URI getUri() { + return URI.create("faildel:///"); + } + + @Override + public boolean delete(Path p, boolean recursive) throws IOException { + throw new IOException("fake delete failed"); + } + } + + + public void testFailAbort() throws IOException, InterruptedException { + Job job = Job.getInstance(); + Configuration conf = job.getConfiguration(); + conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///"); + conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); + FileOutputFormat.setOutputPath(job, outDir); + JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); + TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); + FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); + + // do setup + committer.setupJob(jContext); + committer.setupTask(tContext); + + // write output + TextOutputFormat theOutputFormat = new TextOutputFormat(); + RecordWriter theRecordWriter = theOutputFormat + .getRecordWriter(tContext); + writeOutput(theRecordWriter, tContext); + + // do abort + Throwable th = null; + try { + committer.abortTask(tContext); + } catch (IOException ie) { + th = ie; + } + assertNotNull(th); + assertTrue(th instanceof IOException); + assertTrue(th.getMessage().contains("fake delete failed")); + File jobTmpDir = new File(new Path(outDir, + FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + + conf.getInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0) + + Path.SEPARATOR + + FileOutputCommitter.TEMP_DIR_NAME).toString()); + File taskTmpDir = new File(jobTmpDir, "_" + taskID); + File expectedFile = new File(taskTmpDir, partFile); + assertTrue(expectedFile + " does not exists", expectedFile.exists()); + + th = null; + try { + committer.abortJob(jContext, JobStatus.State.FAILED); + } catch (IOException ie) { + th = ie; + } + assertNotNull(th); + assertTrue(th instanceof IOException); + assertTrue(th.getMessage().contains("fake delete failed")); + assertTrue("job temp dir does not exists", jobTmpDir.exists()); + FileUtil.fullyDelete(new File(outDir.toString())); + } + + public static String slurp(File f) throws IOException { + int len = (int) f.length(); + byte[] buf = new byte[len]; + FileInputStream in = new FileInputStream(f); + String contents = null; + try { + in.read(buf, 0, len); + contents = new String(buf, "UTF-8"); + } finally { + in.close(); + } + return contents; + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java index f51846fb92..8f7ebd56e7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.YarnException; @@ -96,9 +97,11 @@ public CompletedJob(Configuration conf, JobId jobId, Path historyFile, report.setFinishTime(jobInfo.getFinishTime()); report.setJobName(jobInfo.getJobname()); report.setUser(jobInfo.getUsername()); - //TODO Possibly populate job progress. Never used. - //report.setMapProgress(progress) - //report.setReduceProgress(progress) + report.setMapProgress((float) getCompletedMaps() / getTotalMaps()); + report.setReduceProgress((float) getCompletedReduces() / getTotalReduces()); + report.setJobFile(confFile.toString()); + report.setTrackingUrl(JobHistoryUtils.getHistoryUrl(conf, TypeConverter + .toYarn(TypeConverter.fromYarn(jobId)).getAppId())); } @Override @@ -287,8 +290,7 @@ public int getTotalReduces() { @Override public boolean isUber() { - LOG.warn("isUber is not yet implemented"); - return false; + return jobInfo.getUberized(); } @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java index e86eb279e9..35bf9696c2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java @@ -101,12 +101,9 @@ public HistoryClientService(HistoryContext history) { } public void start() { - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration conf = new Configuration(getConfig()); - conf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ClientHSSecurityInfo.class, SecurityInfo.class); - initializeWebApp(getConfig()); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); + initializeWebApp(conf); String serviceAddr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS, JHAdminConfig.DEFAULT_MR_HISTORY_ADDRESS); InetSocketAddress address = NetUtils.createSocketAddr(serviceAddr); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsController.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsController.java index e14d2e8fb8..ac9f53477a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsController.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsController.java @@ -18,6 +18,8 @@ package org.apache.hadoop.mapreduce.v2.hs.webapp; +import java.io.IOException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.app.webapp.App; import org.apache.hadoop.mapreduce.v2.app.webapp.AppController; @@ -57,7 +59,7 @@ protected Class jobPage() { * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#countersPage() */ @Override - protected Class countersPage() { + public Class countersPage() { return HsCountersPage.class; } @@ -108,7 +110,16 @@ public void job() { public void jobCounters() { super.jobCounters(); } - + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#taskCounters() + */ + @Override + public void taskCounters() { + super.taskCounters(); + } + /* * (non-Javadoc) * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#tasks() @@ -157,4 +168,31 @@ protected Class aboutPage() { public void about() { render(aboutPage()); } + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#singleCounterPage() + */ + @Override + protected Class singleCounterPage() { + return HsSingleCounterPage.class; + } + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#singleJobCounter() + */ + @Override + public void singleJobCounter() throws IOException{ + super.singleJobCounter(); + } + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.app.webapp.AppController#singleTaskCounter() + */ + @Override + public void singleTaskCounter() throws IOException{ + super.singleTaskCounter(); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsCountersPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsCountersPage.java index 0840f91357..1bae8fd8af 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsCountersPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsCountersPage.java @@ -18,11 +18,12 @@ package org.apache.hadoop.mapreduce.v2.hs.webapp; +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_ID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; + import org.apache.hadoop.mapreduce.v2.app.webapp.CountersBlock; import org.apache.hadoop.yarn.webapp.SubView; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; - /** * Render the counters page */ @@ -34,7 +35,12 @@ public class HsCountersPage extends HsView { */ @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); - set(initID(ACCORDION, "nav"), "{autoHeight:false, active:1}"); + String tid = $(TASK_ID); + String activeNav = "2"; + if(tid == null || tid.isEmpty()) { + activeNav = "1"; + } + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:"+activeNav+"}"); set(DATATABLES_SELECTOR, "#counters .dt-counters"); set(initSelector(DATATABLES), "{bJQueryUI:true, sDom:'t', iDisplayLength:-1}"); @@ -47,9 +53,9 @@ public class HsCountersPage extends HsView { @Override protected void postHead(Page.HTML<_> html) { html. style("#counters, .dt-counters { table-layout: fixed }", - "#counters th { overflow: hidden; vertical-align: center }", + "#counters th { overflow: hidden; vertical-align: middle }", "#counters .dataTables_wrapper { min-height: 1em }", - "#counters .group { width: 10em }", + "#counters .group { width: 15em }", "#counters .name { width: 30em }"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsNavBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsNavBlock.java index c5e7ed7c79..8d3ccff63d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsNavBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsNavBlock.java @@ -55,6 +55,14 @@ public class HsNavBlock extends HtmlBlock { li().a(url("conf", jobid), "Configuration")._(). li().a(url("tasks", jobid, "m"), "Map tasks")._(). li().a(url("tasks", jobid, "r"), "Reduce tasks")._()._(); + if (app.getTask() != null) { + String taskid = MRApps.toString(app.getTask().getID()); + nav. + h3("Task"). + ul(). + li().a(url("task", taskid), "Task Overview")._(). + li().a(url("taskcounters", taskid), "Counters")._()._(); + } } nav. h3("Tools"). diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsSingleCounterPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsSingleCounterPage.java new file mode 100644 index 0000000000..4e0036a650 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsSingleCounterPage.java @@ -0,0 +1,69 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.hs.webapp; + +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_ID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; + +import org.apache.hadoop.mapreduce.v2.app.webapp.SingleCounterBlock; +import org.apache.hadoop.yarn.webapp.SubView; + +/** + * Render the counters page + */ +public class HsSingleCounterPage extends HsView { + + /* + * (non-Javadoc) + * @see org.apache.hadoop.mapreduce.v2.hs.webapp.HsView#preHead(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.HTML) + */ + @Override protected void preHead(Page.HTML<_> html) { + commonPreHead(html); + String tid = $(TASK_ID); + String activeNav = "2"; + if(tid == null || tid.isEmpty()) { + activeNav = "1"; + } + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:"+activeNav+"}"); + set(DATATABLES_ID, "singleCounter"); + set(initID(DATATABLES, "singleCounter"), counterTableInit()); + setTableStyles(html, "singleCounter"); + } + + /** + * @return The end of a javascript map that is the jquery datatable + * configuration for the jobs table. the Jobs table is assumed to be + * rendered by the class returned from {@link #content()} + */ + private String counterTableInit() { + return tableInit(). + append(", aoColumnDefs:["). + append("{'sType':'title-numeric', 'aTargets': [ 1 ] }"). + append("]}"). + toString(); + } + + /** + * The content of this page is the CountersBlock now. + * @return CountersBlock.class + */ + @Override protected Class content() { + return SingleCounterBlock.class; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java index 5a86310dd4..5488120229 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java @@ -250,7 +250,7 @@ protected Collection getTaskAttempts() { @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); //override the nav config from commonPReHead - set(initID(ACCORDION, "nav"), "{autoHeight:false, active:1}"); + set(initID(ACCORDION, "nav"), "{autoHeight:false, active:2}"); //Set up the java script and CSS for the attempts table set(DATATABLES_ID, "attempts"); set(initID(DATATABLES, "attempts"), attemptsTableInit()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java index 009e20f010..8e6a135c4a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java @@ -41,10 +41,15 @@ public void setup() { route(pajoin("/job", JOB_ID), HsController.class, "job"); route(pajoin("/conf", JOB_ID), HsController.class, "conf"); route(pajoin("/jobcounters", JOB_ID), HsController.class, "jobCounters"); + route(pajoin("/singlejobcounter",JOB_ID, COUNTER_GROUP, COUNTER_NAME), + HsController.class, "singleJobCounter"); route(pajoin("/tasks", JOB_ID, TASK_TYPE), HsController.class, "tasks"); route(pajoin("/attempts", JOB_ID, TASK_TYPE, ATTEMPT_STATE), HsController.class, "attempts"); route(pajoin("/task", TASK_ID), HsController.class, "task"); + route(pajoin("/taskcounters", TASK_ID), HsController.class, "taskCounters"); + route(pajoin("/singletaskcounter",TASK_ID, COUNTER_GROUP, COUNTER_NAME), + HsController.class, "singleTaskCounter"); route("/about", HsController.class, "about"); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index 6a5a57fa1c..64fd88559c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -95,6 +95,8 @@ public void testHistoryParsing() throws Exception { 2, jobInfo.getFinishedMaps()); Assert.assertEquals("incorrect finishedReduces ", 1, jobInfo.getFinishedReduces()); + Assert.assertEquals("incorrect uberized ", + job.isUber(), jobInfo.getUberized()); int totalTasks = jobInfo.getAllTasks().size(); Assert.assertEquals("total number of tasks is incorrect ", 3, totalTasks); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHSWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHSWebApp.java index 32eaf5d719..c0b944fab5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHSWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHSWebApp.java @@ -26,17 +26,13 @@ import java.util.HashMap; import java.util.Map; -import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapreduce.v2.api.records.JobId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; -import org.apache.hadoop.mapreduce.v2.app.job.Task; -import org.apache.hadoop.mapreduce.v2.app.webapp.AMParams; import org.apache.hadoop.mapreduce.v2.app.webapp.TestAMWebApp; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -92,6 +88,7 @@ public Map getAllJobs() { return jobs; // OK } + @SuppressWarnings("rawtypes") @Override public EventHandler getEventHandler() { return null; @@ -171,4 +168,16 @@ public void testTaskView() { WebAppTests.testPage(HsConfPage.class, AppContext.class, new TestAppContext()); } + + @Test public void testAboutView() { + LOG.info("HsAboutPage"); + WebAppTests.testPage(HsAboutPage.class, AppContext.class, + new TestAppContext()); + } + + @Test public void testSingleCounterView() { + LOG.info("HsSingleCounterPage"); + WebAppTests.testPage(HsSingleCounterPage.class, AppContext.class, + new TestAppContext()); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java index 20c6ce7c00..24c13e745d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java @@ -80,17 +80,14 @@ private MRClientProtocol instantiateHistoryProxy() return null; } LOG.info("Connecting to HistoryServer at: " + serviceAddr); - final Configuration myConf = new Configuration(conf); - myConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - ClientHSSecurityInfo.class, SecurityInfo.class); - final YarnRPC rpc = YarnRPC.create(myConf); + final YarnRPC rpc = YarnRPC.create(conf); LOG.info("Connected to HistoryServer at: " + serviceAddr); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); return currentUser.doAs(new PrivilegedAction() { @Override public MRClientProtocol run() { return (MRClientProtocol) rpc.getProxy(MRClientProtocol.class, - NetUtils.createSocketAddr(serviceAddr), myConf); + NetUtils.createSocketAddr(serviceAddr), conf); } }); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java index 341e17e951..20817af8e1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java @@ -21,10 +21,12 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.net.InetSocketAddress; import java.security.PrivilegedAction; import java.util.HashMap; import java.util.List; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -60,7 +62,7 @@ import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -86,8 +88,9 @@ public class ClientServiceDelegate { private MRClientProtocol realProxy = null; private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); private static String UNKNOWN_USER = "Unknown User"; + private String trackingUrl; - public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, + public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, JobID jobId, MRClientProtocol historyServerProxy) { this.conf = new Configuration(conf); // Cloning for modifying. // For faster redirects from AM to HS. @@ -101,7 +104,7 @@ public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, // Get the instance of the NotRunningJob corresponding to the specified // user and state - private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, + private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, JobState state) { synchronized (notRunningJobs) { HashMap map = notRunningJobs.get(state); @@ -109,8 +112,8 @@ private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, map = new HashMap(); notRunningJobs.put(state, map); } - String user = - (applicationReport == null) ? + String user = + (applicationReport == null) ? UNKNOWN_USER : applicationReport.getUser(); NotRunningJob notRunningJob = map.get(user); if (notRunningJob == null) { @@ -129,8 +132,11 @@ private MRClientProtocol getProxy() throws YarnRemoteException { // Possibly allow nulls through the PB tunnel, otherwise deal with an exception // and redirect to the history server. ApplicationReport application = rm.getApplicationReport(appId); + if (application != null) { + trackingUrl = application.getTrackingUrl(); + } String serviceAddr = null; - while (application == null || ApplicationState.RUNNING.equals(application.getState())) { + while (application == null || YarnApplicationState.RUNNING.equals(application.getYarnApplicationState())) { if (application == null) { LOG.info("Could not get Job info from RM for job " + jobId + ". Redirecting to job history server."); @@ -140,8 +146,8 @@ private MRClientProtocol getProxy() throws YarnRemoteException { if (application.getHost() == null || "".equals(application.getHost())) { LOG.debug("AM not assigned to Job. Waiting to get the AM ..."); Thread.sleep(2000); - - LOG.debug("Application state is " + application.getState()); + + LOG.debug("Application state is " + application.getYarnApplicationState()); application = rm.getApplicationReport(appId); continue; } @@ -151,8 +157,11 @@ private MRClientProtocol getProxy() throws YarnRemoteException { Token clientToken = new Token(); clientToken.decodeFromUrlString(clientTokenEncoded); - clientToken.setService(new Text(application.getHost() + ":" - + application.getRpcPort())); + // RPC layer client expects ip:port as service for tokens + InetSocketAddress addr = NetUtils.createSocketAddr(application + .getHost(), application.getRpcPort()); + clientToken.setService(new Text(addr.getAddress().getHostAddress() + + ":" + addr.getPort())); UserGroupInformation.getCurrentUser().addToken(clientToken); } LOG.info("Tracking Url of JOB is " + application.getTrackingUrl()); @@ -163,7 +172,7 @@ private MRClientProtocol getProxy() throws YarnRemoteException { //possibly the AM has crashed //there may be some time before AM is restarted //keep retrying by getting the address from RM - LOG.info("Could not connect to " + serviceAddr + + LOG.info("Could not connect to " + serviceAddr + ". Waiting for getting the latest AM address..."); try { Thread.sleep(2000); @@ -184,35 +193,36 @@ private MRClientProtocol getProxy() throws YarnRemoteException { } /** we just want to return if its allocating, so that we don't - * block on it. This is to be able to return job status + * block on it. This is to be able to return job status * on an allocating Application. */ - + String user = application.getUser(); if (user == null) { throw RPCUtil.getRemoteException("User is not set in the application report"); } - if (application.getState() == ApplicationState.NEW || - application.getState() == ApplicationState.SUBMITTED) { + if (application.getYarnApplicationState() == YarnApplicationState.NEW || + application.getYarnApplicationState() == YarnApplicationState.SUBMITTED) { realProxy = null; return getNotRunningJob(application, JobState.NEW); } - - if (application.getState() == ApplicationState.FAILED) { + + if (application.getYarnApplicationState() == YarnApplicationState.FAILED) { realProxy = null; return getNotRunningJob(application, JobState.FAILED); } - - if (application.getState() == ApplicationState.KILLED) { + + if (application.getYarnApplicationState() == YarnApplicationState.KILLED) { realProxy = null; return getNotRunningJob(application, JobState.KILLED); } - - //History server can serve a job only if application + + //History server can serve a job only if application //succeeded. - if (application.getState() == ApplicationState.SUCCEEDED) { - LOG.info("Application state is completed. " + - "Redirecting to job history server"); + if (application.getYarnApplicationState() == YarnApplicationState.FINISHED) { + LOG.info("Application state is completed. FinalApplicationStatus=" + + application.getFinalApplicationStatus().toString() + + ". Redirecting to job history server"); realProxy = checkAndGetHSProxy(application, JobState.SUCCEEDED); } return realProxy; @@ -233,19 +243,15 @@ private void instantiateAMProxy(final String serviceAddr) throws IOException { realProxy = currentUser.doAs(new PrivilegedAction() { @Override public MRClientProtocol run() { - Configuration myConf = new Configuration(conf); - myConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); - YarnRPC rpc = YarnRPC.create(myConf); + YarnRPC rpc = YarnRPC.create(conf); return (MRClientProtocol) rpc.getProxy(MRClientProtocol.class, - NetUtils.createSocketAddr(serviceAddr), myConf); + NetUtils.createSocketAddr(serviceAddr), conf); } }); LOG.trace("Connected to ApplicationMaster at: " + serviceAddr); } - private synchronized Object invoke(String method, Class argClass, + private synchronized Object invoke(String method, Class argClass, Object args) throws YarnRemoteException { Method methodOb = null; try { @@ -284,10 +290,10 @@ public org.apache.hadoop.mapreduce.Counters getJobCounters(JobID arg0) throws IO org.apache.hadoop.mapreduce.v2.api.records.JobId jobID = TypeConverter.toYarn(arg0); GetCountersRequest request = recordFactory.newRecordInstance(GetCountersRequest.class); request.setJobId(jobID); - Counters cnt = ((GetCountersResponse) + Counters cnt = ((GetCountersResponse) invoke("getCounters", GetCountersRequest.class, request)).getCounters(); return TypeConverter.fromYarn(cnt); - + } public TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2) @@ -299,7 +305,7 @@ public TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int a request.setJobId(jobID); request.setFromEventId(arg1); request.setMaxEvents(arg2); - List list = + List list = ((GetTaskAttemptCompletionEventsResponse) invoke( "getTaskAttemptCompletionEvents", GetTaskAttemptCompletionEventsRequest.class, request)). getCompletionEventList(); @@ -327,38 +333,43 @@ public String[] getTaskDiagnostics(org.apache.hadoop.mapreduce.TaskAttemptID arg } public JobStatus getJobStatus(JobID oldJobID) throws YarnRemoteException { - org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = + org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter.toYarn(oldJobID); - GetJobReportRequest request = + GetJobReportRequest request = recordFactory.newRecordInstance(GetJobReportRequest.class); request.setJobId(jobId); - JobReport report = ((GetJobReportResponse) invoke("getJobReport", + JobReport report = ((GetJobReportResponse) invoke("getJobReport", GetJobReportRequest.class, request)).getJobReport(); - String jobFile = MRApps.getJobFile(conf, report.getUser(), oldJobID); - - return TypeConverter.fromYarn(report, jobFile); + if (StringUtils.isEmpty(report.getJobFile())) { + String jobFile = MRApps.getJobFile(conf, report.getUser(), oldJobID); + report.setJobFile(jobFile); + } + String historyTrackingUrl = report.getTrackingUrl(); + return TypeConverter.fromYarn(report, "http://" + + (StringUtils.isNotEmpty(historyTrackingUrl) ? historyTrackingUrl + : trackingUrl)); } public org.apache.hadoop.mapreduce.TaskReport[] getTaskReports(JobID oldJobID, TaskType taskType) throws YarnRemoteException, YarnRemoteException { - org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = + org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter.toYarn(oldJobID); - GetTaskReportsRequest request = + GetTaskReportsRequest request = recordFactory.newRecordInstance(GetTaskReportsRequest.class); request.setJobId(jobId); request.setTaskType(TypeConverter.toYarn(taskType)); - - List taskReports = - ((GetTaskReportsResponse) invoke("getTaskReports", GetTaskReportsRequest.class, + + List taskReports = + ((GetTaskReportsResponse) invoke("getTaskReports", GetTaskReportsRequest.class, request)).getTaskReportList(); - + return TypeConverter.fromYarn (taskReports).toArray(new org.apache.hadoop.mapreduce.TaskReport[0]); } public boolean killTask(TaskAttemptID taskAttemptID, boolean fail) throws YarnRemoteException { - org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID + org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); if (fail) { FailTaskAttemptRequest failRequest = recordFactory.newRecordInstance(FailTaskAttemptRequest.class); @@ -371,10 +382,10 @@ public boolean killTask(TaskAttemptID taskAttemptID, boolean fail) } return true; } - + public boolean killJob(JobID oldJobID) throws YarnRemoteException { - org.apache.hadoop.mapreduce.v2.api.records.JobId jobId + org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter.toYarn(oldJobID); KillJobRequest killRequest = recordFactory.newRecordInstance(KillJobRequest.class); killRequest.setJobId(jobId); @@ -382,5 +393,5 @@ public boolean killJob(JobID oldJobID) return true; } - + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java index 17ad9f62aa..4b2d25676d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java @@ -22,8 +22,6 @@ import java.util.HashMap; import org.apache.commons.lang.NotImplementedException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptRequest; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptResponse; @@ -55,40 +53,36 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskReport; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.util.BuilderUtils; public class NotRunningJob implements MRClientProtocol { - private static final Log LOG = LogFactory.getLog(NotRunningJob.class); - - private RecordFactory recordFactory = + private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); - + private final JobState jobState; private final ApplicationReport applicationReport; - - + + private ApplicationReport getUnknownApplicationReport() { - ApplicationReport unknown = - recordFactory.newRecordInstance(ApplicationReport.class); - unknown.setUser("N/A"); - unknown.setHost("N/A"); - unknown.setName("N/A"); - unknown.setQueue("N/A"); - unknown.setStartTime(0); - unknown.setFinishTime(0); - unknown.setTrackingUrl("N/A"); - unknown.setDiagnostics("N/A"); - LOG.info("getUnknownApplicationReport"); - return unknown; + ApplicationId unknownAppId = recordFactory.newRecordInstance(ApplicationId.class); + + // Setting AppState to NEW and finalStatus to UNDEFINED as they are never used + // for a non running job + return BuilderUtils.newApplicationReport(unknownAppId, "N/A", "N/A", "N/A", "N/A", 0, "", + YarnApplicationState.NEW, "N/A", "N/A", 0, 0, FinalApplicationStatus.UNDEFINED); } - + NotRunningJob(ApplicationReport applicationReport, JobState jobState) { - this.applicationReport = - (applicationReport == null) ? + this.applicationReport = + (applicationReport == null) ? getUnknownApplicationReport() : applicationReport; this.jobState = jobState; } @@ -96,7 +90,7 @@ private ApplicationReport getUnknownApplicationReport() { @Override public FailTaskAttemptResponse failTaskAttempt( FailTaskAttemptRequest request) throws YarnRemoteException { - FailTaskAttemptResponse resp = + FailTaskAttemptResponse resp = recordFactory.newRecordInstance(FailTaskAttemptResponse.class); return resp; } @@ -104,7 +98,7 @@ public FailTaskAttemptResponse failTaskAttempt( @Override public GetCountersResponse getCounters(GetCountersRequest request) throws YarnRemoteException { - GetCountersResponse resp = + GetCountersResponse resp = recordFactory.newRecordInstance(GetCountersResponse.class); Counters counters = recordFactory.newRecordInstance(Counters.class); counters.addAllCounterGroups(new HashMap()); @@ -115,7 +109,7 @@ public GetCountersResponse getCounters(GetCountersRequest request) @Override public GetDiagnosticsResponse getDiagnostics(GetDiagnosticsRequest request) throws YarnRemoteException { - GetDiagnosticsResponse resp = + GetDiagnosticsResponse resp = recordFactory.newRecordInstance(GetDiagnosticsResponse.class); resp.addDiagnostics(""); return resp; @@ -135,7 +129,7 @@ public GetJobReportResponse getJobReport(GetJobReportRequest request) jobReport.setTrackingUrl(applicationReport.getTrackingUrl()); jobReport.setFinishTime(applicationReport.getFinishTime()); - GetJobReportResponse resp = + GetJobReportResponse resp = recordFactory.newRecordInstance(GetJobReportResponse.class); resp.setJobReport(jobReport); return resp; @@ -145,7 +139,7 @@ public GetJobReportResponse getJobReport(GetJobReportRequest request) public GetTaskAttemptCompletionEventsResponse getTaskAttemptCompletionEvents( GetTaskAttemptCompletionEventsRequest request) throws YarnRemoteException { - GetTaskAttemptCompletionEventsResponse resp = + GetTaskAttemptCompletionEventsResponse resp = recordFactory.newRecordInstance(GetTaskAttemptCompletionEventsResponse.class); resp.addAllCompletionEvents(new ArrayList()); return resp; @@ -161,7 +155,7 @@ public GetTaskAttemptReportResponse getTaskAttemptReport( @Override public GetTaskReportResponse getTaskReport(GetTaskReportRequest request) throws YarnRemoteException { - GetTaskReportResponse resp = + GetTaskReportResponse resp = recordFactory.newRecordInstance(GetTaskReportResponse.class); TaskReport report = recordFactory.newRecordInstance(TaskReport.class); report.setTaskId(request.getTaskId()); @@ -176,7 +170,7 @@ public GetTaskReportResponse getTaskReport(GetTaskReportRequest request) @Override public GetTaskReportsResponse getTaskReports(GetTaskReportsRequest request) throws YarnRemoteException { - GetTaskReportsResponse resp = + GetTaskReportsResponse resp = recordFactory.newRecordInstance(GetTaskReportsResponse.class); resp.addAllTaskReports(new ArrayList()); return resp; @@ -185,7 +179,7 @@ public GetTaskReportsResponse getTaskReports(GetTaskReportsRequest request) @Override public KillJobResponse killJob(KillJobRequest request) throws YarnRemoteException { - KillJobResponse resp = + KillJobResponse resp = recordFactory.newRecordInstance(KillJobResponse.class); return resp; } @@ -193,7 +187,7 @@ public KillJobResponse killJob(KillJobRequest request) @Override public KillTaskResponse killTask(KillTaskRequest request) throws YarnRemoteException { - KillTaskResponse resp = + KillTaskResponse resp = recordFactory.newRecordInstance(KillTaskResponse.class); return resp; } @@ -201,9 +195,9 @@ public KillTaskResponse killTask(KillTaskRequest request) @Override public KillTaskAttemptResponse killTaskAttempt( KillTaskAttemptRequest request) throws YarnRemoteException { - KillTaskAttemptResponse resp = + KillTaskAttemptResponse resp = recordFactory.newRecordInstance(KillTaskAttemptResponse.class); return resp; } - + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 8b7c818b1e..5bc171141d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -91,13 +91,9 @@ public ResourceMgrDelegate(YarnConfiguration conf) { YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS)); LOG.info("Connecting to ResourceManager at " + rmAddress); - Configuration appsManagerServerConf = new Configuration(this.conf); - appsManagerServerConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ClientRMSecurityInfo.class, SecurityInfo.class); applicationsManager = (ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, - rmAddress, appsManagerServerConf); + rmAddress, this.conf); LOG.info("Connected to ResourceManager at " + rmAddress); } @@ -262,7 +258,7 @@ public String getStagingAreaDir() throws IOException, InterruptedException { String user = UserGroupInformation.getCurrentUser().getShortUserName(); Path path = MRApps.getStagingAreaDir(conf, user); - LOG.info("DEBUG --- getStagingAreaDir: dir=" + path); + LOG.debug("getStagingAreaDir: dir=" + path); return path.toString(); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index a11968a16f..aceb02378b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -62,7 +62,6 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.LocalResource; @@ -70,6 +69,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -99,7 +99,7 @@ public YARNRunner(Configuration conf) { } /** - * Similar to {@link #YARNRunner(Configuration)} but allowing injecting + * Similar to {@link #YARNRunner(Configuration)} but allowing injecting * {@link ResourceMgrDelegate}. Enables mocking and testing. * @param conf the configuration object for the client * @param resMgrDelegate the resourcemanager client handle. @@ -107,12 +107,12 @@ public YARNRunner(Configuration conf) { public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate) { this(conf, resMgrDelegate, new ClientCache(conf, resMgrDelegate)); } - + /** - * Similar to {@link YARNRunner#YARNRunner(Configuration, ResourceMgrDelegate)} + * Similar to {@link YARNRunner#YARNRunner(Configuration, ResourceMgrDelegate)} * but allowing injecting {@link ClientCache}. Enable mocking and testing. * @param conf the configuration object - * @param resMgrDelegate the resource manager delegate + * @param resMgrDelegate the resource manager delegate * @param clientCache the client cache object. */ public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate, @@ -126,7 +126,7 @@ public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate, throw new RuntimeException("Error in instantiating YarnClient", ufe); } } - + @Override public void cancelDelegationToken(Token arg0) throws IOException, InterruptedException { @@ -152,7 +152,7 @@ public TaskTrackerInfo[] getBlacklistedTrackers() throws IOException, @Override public ClusterMetrics getClusterMetrics() throws IOException, - InterruptedException { + InterruptedException { return resMgrDelegate.getClusterMetrics(); } @@ -209,13 +209,13 @@ public String getStagingAreaDir() throws IOException, InterruptedException { public String getSystemDir() throws IOException, InterruptedException { return resMgrDelegate.getSystemDir(); } - + @Override public long getTaskTrackerExpiryInterval() throws IOException, InterruptedException { return resMgrDelegate.getTaskTrackerExpiryInterval(); } - + @Override public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException, InterruptedException { @@ -230,20 +230,20 @@ public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) } // Construct necessary information to start the MR AM - ApplicationSubmissionContext appContext = + ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, jobSubmitDir, ts); - + // Submit to ResourceManager ApplicationId applicationId = resMgrDelegate.submitApplication(appContext); - + ApplicationReport appMaster = resMgrDelegate .getApplicationReport(applicationId); - String diagnostics = - (appMaster == null ? + String diagnostics = + (appMaster == null ? "application report is null" : appMaster.getDiagnostics()); - if (appMaster == null || appMaster.getState() == ApplicationState.FAILED - || appMaster.getState() == ApplicationState.KILLED) { - throw new IOException("Failed to run job : " + + if (appMaster == null || appMaster.getYarnApplicationState() == YarnApplicationState.FAILED + || appMaster.getYarnApplicationState() == YarnApplicationState.KILLED) { + throw new IOException("Failed to run job : " + diagnostics); } return clientCache.getClient(jobId).getJobStatus(jobId); @@ -266,7 +266,7 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( Configuration jobConf, String jobSubmitDir, Credentials ts) throws IOException { ApplicationId applicationId = resMgrDelegate.getApplicationId(); - + // Setup resource requirements Resource capability = recordFactory.newRecordInstance(Resource.class); capability.setMemory(conf.getInt(MRJobConfig.MR_AM_VMEM_MB, @@ -276,9 +276,9 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( // Setup LocalResources Map localResources = new HashMap(); - + Path jobConfPath = new Path(jobSubmitDir, MRJobConfig.JOB_CONF_FILE); - + URL yarnUrlForJobSubmitDir = ConverterUtils .getYarnUrlFromPath(defaultFileContext.getDefaultFileSystem() .resolvePath( @@ -299,18 +299,18 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( LOG.info("Job jar is not present. " + "Not adding any jar to the list of resources."); } - + // TODO gross hack - for (String s : new String[] { - MRJobConfig.JOB_SPLIT, + for (String s : new String[] { + MRJobConfig.JOB_SPLIT, MRJobConfig.JOB_SPLIT_METAINFO, MRJobConfig.APPLICATION_TOKENS_FILE }) { localResources.put( MRJobConfig.JOB_SUBMIT_DIR + "/" + s, - createApplicationResource(defaultFileContext, + createApplicationResource(defaultFileContext, new Path(jobSubmitDir, s))); } - + // Setup security tokens ByteBuffer securityTokens = null; if (UserGroupInformation.isSecurityEnabled()) { @@ -322,20 +322,20 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( // Setup the command to run the AM Vector vargs = new Vector(8); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); - + long logSize = TaskLog.getTaskLogLength(new JobConf(conf)); vargs.add("-Dlog4j.configuration=container-log4j.properties"); vargs.add("-D" + MRJobConfig.TASK_LOG_DIR + "=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add("-D" + MRJobConfig.TASK_LOG_SIZE + "=" + logSize); - + vargs.add(conf.get(MRJobConfig.MR_AM_COMMAND_OPTS, MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS)); vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS); - vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + + vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + Path.SEPARATOR + ApplicationConstants.STDOUT); - vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + + vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + Path.SEPARATOR + ApplicationConstants.STDERR); @@ -349,12 +349,12 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( LOG.info("Command to launch container for ApplicationMaster is : " + mergedCommand); - - // Setup the CLASSPATH in environment + + // Setup the CLASSPATH in environment // i.e. add { job jar, CWD, Hadoop jars} to classpath. Map environment = new HashMap(); MRApps.setClasspath(environment); - + // Parse distributed cache MRApps.setupDistributedCache(jobConf, localResources); @@ -374,12 +374,12 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( appContext.setUser( // User name UserGroupInformation.getCurrentUser().getShortUserName()); appContext.setQueue( // Queue name - jobConf.get(JobContext.QUEUE_NAME, + jobConf.get(JobContext.QUEUE_NAME, YarnConfiguration.DEFAULT_QUEUE_NAME)); appContext.setApplicationName( // Job name - jobConf.get(JobContext.JOB_NAME, - YarnConfiguration.DEFAULT_APPLICATION_NAME)); - appContext.setAMContainerSpec(amContainer); // AM Container + jobConf.get(JobContext.JOB_NAME, + YarnConfiguration.DEFAULT_APPLICATION_NAME)); + appContext.setAMContainerSpec(amContainer); // AM Container return appContext; } @@ -394,14 +394,14 @@ public void setJobPriority(JobID arg0, String arg1) throws IOException, public long getProtocolVersion(String arg0, long arg1) throws IOException { return resMgrDelegate.getProtocolVersion(arg0, arg1); } - + @Override public long renewDelegationToken(Token arg0) throws IOException, InterruptedException { return resMgrDelegate.renewDelegationToken(arg0); } - + @Override public Counters getJobCounters(JobID arg0) throws IOException, InterruptedException { @@ -419,7 +419,7 @@ public JobStatus getJobStatus(JobID jobID) throws IOException, JobStatus status = clientCache.getClient(jobID).getJobStatus(jobID); return status; } - + @Override public TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2) throws IOException, InterruptedException { @@ -446,8 +446,8 @@ public void killJob(JobID arg0) throws IOException, InterruptedException { if (status.getState() != JobStatus.State.RUNNING) { resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId()); return; - } - + } + try { /* send a kill to the AM */ clientCache.getClient(arg0).killJob(arg0); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index d90e721694..5d839252ea 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -88,7 +88,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -107,17 +108,17 @@ public class TestClientRedirect { private static final Log LOG = LogFactory.getLog(TestClientRedirect.class); private static final String RMADDRESS = "0.0.0.0:8054"; private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); - + private static final String AMHOSTADDRESS = "0.0.0.0:10020"; private static final String HSHOSTADDRESS = "0.0.0.0:10021"; - private volatile boolean amContact = false; + private volatile boolean amContact = false; private volatile boolean hsContact = false; private volatile boolean amRunning = false; private volatile boolean amRestarting = false; @Test public void testRedirect() throws Exception { - + Configuration conf = new YarnConfiguration(); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS); @@ -125,7 +126,7 @@ public void testRedirect() throws Exception { RMService rmService = new RMService("test"); rmService.init(conf); rmService.start(); - + AMService amService = new AMService(); amService.init(conf); amService.start(conf); @@ -134,16 +135,16 @@ public void testRedirect() throws Exception { HistoryService historyService = new HistoryService(); historyService.init(conf); historyService.start(conf); - + LOG.info("services started"); Cluster cluster = new Cluster(conf); org.apache.hadoop.mapreduce.JobID jobID = new org.apache.hadoop.mapred.JobID("201103121733", 1); - org.apache.hadoop.mapreduce.Counters counters = + org.apache.hadoop.mapreduce.Counters counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(amContact); - + LOG.info("Sleeping for 5 seconds before stop for" + " the client socket to not get EOF immediately.."); Thread.sleep(5000); @@ -155,17 +156,17 @@ public void testRedirect() throws Exception { LOG.info("Sleeping for 5 seconds after stop for" + " the server to exit cleanly.."); Thread.sleep(5000); - + amRestarting = true; // Same client //results are returned from fake (not started job) counters = cluster.getJob(jobID).getCounters(); Assert.assertEquals(0, counters.countCounters()); Job job = cluster.getJob(jobID); - org.apache.hadoop.mapreduce.TaskID taskId = + org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0); TaskAttemptID tId = new TaskAttemptID(taskId, 0); - + //invoke all methods to check that no exception is thrown job.killJob(); job.killTask(tId); @@ -175,25 +176,25 @@ public void testRedirect() throws Exception { job.getTaskDiagnostics(tId); job.getTaskReports(TaskType.MAP); job.getTrackingURL(); - + amRestarting = false; amService = new AMService(); amService.init(conf); amService.start(conf); amRunning = true; amContact = false; //reset - + counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(amContact); - + amRunning = false; // Same client counters = cluster.getJob(jobID).getCounters(); validateCounters(counters); Assert.assertTrue(hsContact); - + rmService.stop(); historyService.stop(); } @@ -248,7 +249,7 @@ public void start() { public GetNewApplicationResponse getNewApplication(GetNewApplicationRequest request) throws YarnRemoteException { return null; } - + @Override public GetApplicationReportResponse getApplicationReport( GetApplicationReportRequest request) throws YarnRemoteException { @@ -256,12 +257,14 @@ public GetApplicationReportResponse getApplicationReport( ApplicationReport application = recordFactory .newRecordInstance(ApplicationReport.class); application.setApplicationId(applicationId); + application.setFinalApplicationStatus(FinalApplicationStatus.UNDEFINED); if (amRunning) { - application.setState(ApplicationState.RUNNING); + application.setYarnApplicationState(YarnApplicationState.RUNNING); } else if (amRestarting) { - application.setState(ApplicationState.SUBMITTED); + application.setYarnApplicationState(YarnApplicationState.SUBMITTED); } else { - application.setState(ApplicationState.SUCCEEDED); + application.setYarnApplicationState(YarnApplicationState.FINISHED); + application.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); } String[] split = AMHOSTADDRESS.split(":"); application.setHost(split[0]); @@ -339,7 +342,7 @@ public GetCountersResponse getCounters(GetCountersRequest request) throws YarnRe } } - class AMService extends AbstractService + class AMService extends AbstractService implements MRClientProtocol { private InetSocketAddress bindAddress; private Server server; @@ -347,7 +350,7 @@ class AMService extends AbstractService public AMService() { this(AMHOSTADDRESS); } - + public AMService(String hostAddress) { super("AMService"); this.hostAddress = hostAddress; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java index 5b07d4997d..d04d7e3d99 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java @@ -32,8 +32,9 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse; import org.apache.hadoop.mapreduce.v2.api.records.JobReport; import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.ipc.RPCUtil; @@ -124,6 +125,26 @@ public void testHistoryServerNotConfigured() throws Exception { Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState()); } + + @Test + public void testJobReportFromHistoryServer() throws Exception { + MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); + when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn( + getJobReportResponseFromHistoryServer()); + ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); + when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId())) + .thenReturn(null); + ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate( + historyServerProxy, rm); + + JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); + Assert.assertNotNull(jobStatus); + Assert.assertEquals("TestJobFilePath", jobStatus.getJobFile()); + Assert.assertEquals("http://TestTrackingUrl", jobStatus.getTrackingUrl()); + Assert.assertEquals(1.0f, jobStatus.getMapProgress()); + Assert.assertEquals(1.0f, jobStatus.getReduceProgress()); + } + private GetJobReportRequest getJobReportRequest() { GetJobReportRequest request = Records.newRecord(GetJobReportRequest.class); request.setJobId(jobId); @@ -143,7 +164,7 @@ private GetJobReportResponse getJobReportResponse() { private ApplicationReport getApplicationReport() { ApplicationReport applicationReport = Records .newRecord(ApplicationReport.class); - applicationReport.setState(ApplicationState.SUCCEEDED); + applicationReport.setYarnApplicationState(YarnApplicationState.FINISHED); applicationReport.setUser("root"); applicationReport.setHost("N/A"); applicationReport.setName("N/A"); @@ -152,6 +173,7 @@ private ApplicationReport getApplicationReport() { applicationReport.setFinishTime(0); applicationReport.setTrackingUrl("N/A"); applicationReport.setDiagnostics("N/A"); + applicationReport.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); return applicationReport; } @@ -170,4 +192,17 @@ private ClientServiceDelegate getClientServiceDelegate( return clientServiceDelegate; } + private GetJobReportResponse getJobReportResponseFromHistoryServer() { + GetJobReportResponse jobReportResponse = Records + .newRecord(GetJobReportResponse.class); + JobReport jobReport = Records.newRecord(JobReport.class); + jobReport.setJobId(jobId); + jobReport.setJobState(JobState.SUCCEEDED); + jobReport.setMapProgress(1.0f); + jobReport.setReduceProgress(1.0f); + jobReport.setJobFile("TestJobFilePath"); + jobReport.setTrackingUrl("TestTrackingUrl"); + jobReportResponse.setJobReport(jobReport); + return jobReportResponse; + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index aa832aa1cc..437ae13571 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -273,9 +273,9 @@ public void testFailingMapper() throws IOException, InterruptedException, TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2); Assert.assertEquals(TaskCompletionEvent.Status.FAILED, - events[0].getStatus().FAILED); - Assert.assertEquals(TaskCompletionEvent.Status.FAILED, - events[1].getStatus().FAILED); + events[0].getStatus()); + Assert.assertEquals(TaskCompletionEvent.Status.TIPFAILED, + events[1].getStatus()); Assert.assertEquals(JobStatus.State.FAILED, job.getJobState()); verifyFailingMapperCounters(job); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithHistoryService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithHistoryService.java index 49f0c174d9..29c41404d5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithHistoryService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithHistoryService.java @@ -36,7 +36,6 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.junit.Before; import org.junit.After; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java index 346ccd2f0d..bacf164863 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java @@ -64,10 +64,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -77,25 +77,25 @@ import org.mockito.stubbing.Answer; /** - * Test YarnRunner and make sure the client side plugin works + * Test YarnRunner and make sure the client side plugin works * fine */ public class TestYARNRunner extends TestCase { private static final Log LOG = LogFactory.getLog(TestYARNRunner.class); private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); - + private YARNRunner yarnRunner; private ResourceMgrDelegate resourceMgrDelegate; private YarnConfiguration conf; private ClientCache clientCache; private ApplicationId appId; private JobID jobId; - private File testWorkDir = + private File testWorkDir = new File("target", TestYARNRunner.class.getName()); private ApplicationSubmissionContext submissionContext; private ClientServiceDelegate clientDelegate; private static final String failString = "Rejected job"; - + @Before public void setUp() throws Exception { resourceMgrDelegate = mock(ResourceMgrDelegate.class); @@ -115,7 +115,7 @@ public ApplicationSubmissionContext answer(InvocationOnMock invocation) } ).when(yarnRunner).createApplicationSubmissionContext(any(Configuration.class), any(String.class), any(Credentials.class)); - + appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(System.currentTimeMillis()); appId.setId(1); @@ -125,13 +125,13 @@ public ApplicationSubmissionContext answer(InvocationOnMock invocation) } testWorkDir.mkdirs(); } - - + + @Test public void testJobKill() throws Exception { clientDelegate = mock(ClientServiceDelegate.class); - when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new - org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, + when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new + org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, State.PREP, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp")); when(clientDelegate.killJob(any(JobID.class))).thenReturn(true); doAnswer( @@ -145,13 +145,13 @@ public ClientServiceDelegate answer(InvocationOnMock invocation) ).when(clientCache).getClient(any(JobID.class)); yarnRunner.killJob(jobId); verify(resourceMgrDelegate).killApplication(appId); - when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new - org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, + when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new + org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, State.RUNNING, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp")); yarnRunner.killJob(jobId); verify(clientDelegate).killJob(jobId); } - + @Test public void testJobSubmissionFailure() throws Exception { when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))). @@ -159,7 +159,7 @@ public void testJobSubmissionFailure() throws Exception { ApplicationReport report = mock(ApplicationReport.class); when(report.getApplicationId()).thenReturn(appId); when(report.getDiagnostics()).thenReturn(failString); - when(report.getState()).thenReturn(ApplicationState.FAILED); + when(report.getYarnApplicationState()).thenReturn(YarnApplicationState.FAILED); when(resourceMgrDelegate.getApplicationReport(appId)).thenReturn(report); Credentials credentials = new Credentials(); File jobxml = new File(testWorkDir, "job.xml"); @@ -167,13 +167,13 @@ public void testJobSubmissionFailure() throws Exception { conf.writeXml(out); out.close(); try { - yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials); + yarnRunner.submitJob(jobId, testWorkDir.getAbsolutePath().toString(), credentials); } catch(IOException io) { LOG.info("Logging exception:", io); assertTrue(io.getLocalizedMessage().contains(failString)); } } - + @Test public void testResourceMgrDelegate() throws Exception { /* we not want a mock of resourcemgr deleagte */ @@ -184,19 +184,19 @@ public void testResourceMgrDelegate() throws Exception { .thenReturn(null); delegate.killApplication(appId); verify(clientRMProtocol).forceKillApplication(any(KillApplicationRequest.class)); - + /* make sure getalljobs calls get all applications */ when(clientRMProtocol.getAllApplications(any(GetAllApplicationsRequest.class))). thenReturn(recordFactory.newRecordInstance(GetAllApplicationsResponse.class)); delegate.getAllJobs(); verify(clientRMProtocol).getAllApplications(any(GetAllApplicationsRequest.class)); - + /* make sure getapplication report is called */ when(clientRMProtocol.getApplicationReport(any(GetApplicationReportRequest.class))) .thenReturn(recordFactory.newRecordInstance(GetApplicationReportResponse.class)); delegate.getApplicationReport(appId); verify(clientRMProtocol).getApplicationReport(any(GetApplicationReportRequest.class)); - + /* make sure metrics is called */ GetClusterMetricsResponse clusterMetricsResponse = recordFactory.newRecordInstance (GetClusterMetricsResponse.class); @@ -206,7 +206,7 @@ public void testResourceMgrDelegate() throws Exception { .thenReturn(clusterMetricsResponse); delegate.getClusterMetrics(); verify(clientRMProtocol).getClusterMetrics(any(GetClusterMetricsRequest.class)); - + when(clientRMProtocol.getClusterNodes(any(GetClusterNodesRequest.class))). thenReturn(recordFactory.newRecordInstance(GetClusterNodesResponse.class)); delegate.getActiveTrackers(); @@ -227,7 +227,7 @@ public void testResourceMgrDelegate() throws Exception { thenReturn(queueInfoResponse); delegate.getQueues(); verify(clientRMProtocol).getQueueInfo(any(GetQueueInfoRequest.class)); - + GetQueueUserAclsInfoResponse aclResponse = recordFactory.newRecordInstance( GetQueueUserAclsInfoResponse.class); when(clientRMProtocol.getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class))) diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index 31354dc714..6f1861f2c2 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -47,7 +47,7 @@ # YARN_ROOT_LOGGER The root appender. Default is INFO,console # -bin=`dirname "$0"` +bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` . "$bin"/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh index efe655976a..dfd500f520 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh @@ -36,7 +36,7 @@ if [ $# -le 1 ]; then exit 1 fi -bin=`dirname "$0"` +bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` . "$bin"/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterRequest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterRequest.java index 36d2af22f9..7a09e77452 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterRequest.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterRequest.java @@ -22,15 +22,16 @@ import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.AMRMProtocol; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; /** - *

The finalization request sent by the ApplicationMaster to + *

The finalization request sent by the ApplicationMaster to * inform the ResourceManager about its completion.

- * + * *

The final request includes details such: *

    *
  • - * {@link ApplicationAttemptId} being managed by the + * {@link ApplicationAttemptId} being managed by the * ApplicationMaster *
  • *
  • Final state of the ApplicationMaster
  • @@ -47,19 +48,19 @@ public interface FinishApplicationMasterRequest { /** - * Get the ApplicationAttemptId being managed by the + * Get the ApplicationAttemptId being managed by the * ApplicationMaster. - * @return ApplicationAttemptId being managed by the + * @return ApplicationAttemptId being managed by the * ApplicationMaster */ @Public @Stable ApplicationAttemptId getApplicationAttemptId(); - + /** - * Set the ApplicationAttemptId being managed by the + * Set the ApplicationAttemptId being managed by the * ApplicationMaster. - * @param applicationAttemptId ApplicationAttemptId being managed + * @param applicationAttemptId ApplicationAttemptId being managed * by the ApplicationMaster */ @Public @@ -72,15 +73,15 @@ public interface FinishApplicationMasterRequest { */ @Public @Stable - String getFinalState(); - + FinalApplicationStatus getFinalApplicationStatus(); + /** - * Set final state of the ApplicationMaster - * @param finalState final state of the ApplicationMaster + * Set the finish state of the ApplicationMaster + * @param finishState finish state of the ApplicationMaster */ @Public @Stable - void setFinalState(String finalState); + void setFinishApplicationStatus(FinalApplicationStatus finishState); /** * Get diagnostic information on application failure. @@ -89,7 +90,7 @@ public interface FinishApplicationMasterRequest { @Public @Stable String getDiagnostics(); - + /** * Set diagnostic information on application failure. * @param diagnostics diagnostic information on application failure @@ -105,10 +106,10 @@ public interface FinishApplicationMasterRequest { @Public @Stable String getTrackingUrl(); - + /** * Set the tracking URLfor the ApplicationMaster - * @param url tracking URLfor the + * @param url tracking URLfor the * ApplicationMaster */ @Public diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterRequestPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterRequestPBImpl.java index 7ec5abe120..01b3738819 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterRequestPBImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterRequestPBImpl.java @@ -21,23 +21,24 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ProtoBase; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptIdPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto; +import org.apache.hadoop.yarn.proto.YarnProtos.FinalApplicationStatusProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterRequestProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterRequestProtoOrBuilder; +import org.apache.hadoop.yarn.util.ProtoUtils; - public class FinishApplicationMasterRequestPBImpl extends ProtoBase implements FinishApplicationMasterRequest { FinishApplicationMasterRequestProto proto = FinishApplicationMasterRequestProto.getDefaultInstance(); FinishApplicationMasterRequestProto.Builder builder = null; boolean viaProto = false; - + private ApplicationAttemptId appAttemptId = null; - - + + public FinishApplicationMasterRequestPBImpl() { builder = FinishApplicationMasterRequestProto.newBuilder(); } @@ -46,7 +47,7 @@ public FinishApplicationMasterRequestPBImpl(FinishApplicationMasterRequestProto this.proto = proto; viaProto = true; } - + public FinishApplicationMasterRequestProto getProto() { mergeLocalToProto(); proto = viaProto ? proto : builder.build(); @@ -61,7 +62,7 @@ private void mergeLocalToBuilder() { } private void mergeLocalToProto() { - if (viaProto) + if (viaProto) maybeInitBuilder(); mergeLocalToBuilder(); proto = builder.build(); @@ -74,8 +75,7 @@ private void maybeInitBuilder() { } viaProto = false; } - - + @Override public ApplicationAttemptId getApplicationAttemptId() { FinishApplicationMasterRequestProtoOrBuilder p = viaProto ? proto : builder; @@ -92,7 +92,7 @@ public ApplicationAttemptId getApplicationAttemptId() { @Override public void setAppAttemptId(ApplicationAttemptId applicationAttemptId) { maybeInitBuilder(); - if (applicationAttemptId == null) + if (applicationAttemptId == null) builder.clearApplicationAttemptId(); this.appAttemptId = applicationAttemptId; } @@ -122,15 +122,22 @@ public void setTrackingUrl(String url) { } @Override - public String getFinalState() { + public FinalApplicationStatus getFinalApplicationStatus() { FinishApplicationMasterRequestProtoOrBuilder p = viaProto ? proto : builder; - return p.getFinalState(); + if (!p.hasFinalApplicationStatus()) { + return null; + } + return convertFromProtoFormat(p.getFinalApplicationStatus()); } @Override - public void setFinalState(String state) { + public void setFinishApplicationStatus(FinalApplicationStatus finishState) { maybeInitBuilder(); - builder.setFinalState(state); + if (finishState == null) { + builder.clearFinalApplicationStatus(); + return; + } + builder.setFinalApplicationStatus(convertToProtoFormat(finishState)); } private ApplicationAttemptIdPBImpl convertFromProtoFormat(ApplicationAttemptIdProto p) { @@ -141,6 +148,13 @@ private ApplicationAttemptIdProto convertToProtoFormat(ApplicationAttemptId t) { return ((ApplicationAttemptIdPBImpl)t).getProto(); } + private FinalApplicationStatus convertFromProtoFormat(FinalApplicationStatusProto s) { + return ProtoUtils.convertFromProtoFormat(s); + } + + private FinalApplicationStatusProto convertToProtoFormat(FinalApplicationStatus s) { + return ProtoUtils.convertToProtoFormat(s); + } -} +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java index 3d959116a6..576d1f84a8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java @@ -23,24 +23,22 @@ import org.apache.hadoop.yarn.api.records.ProtoBase; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.QueueInfoProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder; - -public class RegisterApplicationMasterResponsePBImpl -extends ProtoBase +public class RegisterApplicationMasterResponsePBImpl +extends ProtoBase implements RegisterApplicationMasterResponse { - RegisterApplicationMasterResponseProto proto = + RegisterApplicationMasterResponseProto proto = RegisterApplicationMasterResponseProto.getDefaultInstance(); RegisterApplicationMasterResponseProto.Builder builder = null; boolean viaProto = false; - + private Resource minimumResourceCapability; private Resource maximumResourceCapability; - + public RegisterApplicationMasterResponsePBImpl() { builder = RegisterApplicationMasterResponseProto.newBuilder(); } @@ -49,16 +47,16 @@ public RegisterApplicationMasterResponsePBImpl(RegisterApplicationMasterResponse this.proto = proto; viaProto = true; } - + public RegisterApplicationMasterResponseProto getProto() { mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; return proto; } - + private void mergeLocalToProto() { - if (viaProto) + if (viaProto) maybeInitBuilder(); mergeLocalToBuilder(); proto = builder.build(); @@ -94,7 +92,7 @@ public Resource getMaximumResourceCapability() { if (!p.hasMaximumCapability()) { return null; } - + this.maximumResourceCapability = convertFromProtoFormat(p.getMaximumCapability()); return this.maximumResourceCapability; } @@ -109,7 +107,7 @@ public Resource getMinimumResourceCapability() { if (!p.hasMinimumCapability()) { return null; } - + this.minimumResourceCapability = convertFromProtoFormat(p.getMinimumCapability()); return this.minimumResourceCapability; } @@ -140,4 +138,4 @@ private ResourceProto convertToProtoFormat(Resource resource) { return ((ResourcePBImpl)resource).getProto(); } -} +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java index 3137009f48..c3103cb16e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java @@ -22,38 +22,38 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; /** - * For internal use only... + * For internal use only... */ @Private @Unstable public interface ApplicationMaster { ApplicationId getApplicationId(); void setApplicationId(ApplicationId appId); - + String getHost(); void setHost(String host); - + int getRpcPort(); void setRpcPort(int rpcPort); - + String getTrackingUrl(); void setTrackingUrl(String url); - + ApplicationStatus getStatus(); void setStatus(ApplicationStatus status); - - ApplicationState getState(); - void setState(ApplicationState state); - + + YarnApplicationState getState(); + void setState(YarnApplicationState state); + String getClientToken(); void setClientToken(String clientToken); - + int getAMFailCount(); void setAMFailCount(int amFailCount); - + int getContainerCount(); void setContainerCount(int containerCount); - + String getDiagnostics(); void setDiagnostics(String diagnostics); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java index ffb920d5b9..254a87878e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java @@ -36,13 +36,13 @@ *
  • Host on which the ApplicationMasteris running.
  • *
  • RPC port of the ApplicationMaster.
  • *
  • Tracking URL.
  • - *
  • {@link ApplicationState} of the application.
  • + *
  • {@link YarnApplicationState} of the application.
  • *
  • Diagnostic information in case of errors.
  • *
  • Start time of the application.
  • *
  • Client token of the application (if security is enabled).
  • *
*

- * + * * @see ClientRMProtocol#getApplicationReport(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest) */ @Public @@ -56,7 +56,7 @@ public interface ApplicationReport { @Public @Stable ApplicationId getApplicationId(); - + @Private @Unstable void setApplicationId(ApplicationId applicationId); @@ -68,7 +68,7 @@ public interface ApplicationReport { @Public @Stable String getUser(); - + @Private @Unstable void setUser(String user); @@ -80,7 +80,7 @@ public interface ApplicationReport { @Public @Stable String getQueue(); - + @Private @Unstable void setQueue(String queue); @@ -92,21 +92,21 @@ public interface ApplicationReport { @Public @Stable String getName(); - + @Private @Unstable void setName(String name); /** - * Get the host on which the ApplicationMaster + * Get the host on which the ApplicationMaster * is running. - * @return host on which the ApplicationMaster + * @return host on which the ApplicationMaster * is running */ @Public @Stable String getHost(); - + @Private @Unstable void setHost(String host); @@ -118,47 +118,47 @@ public interface ApplicationReport { @Public @Stable int getRpcPort(); - + @Private @Unstable void setRpcPort(int rpcPort); /** - * Get the client token for communicating with the + * Get the client token for communicating with the * ApplicationMaster. - * @return client token for communicating with the + * @return client token for communicating with the * ApplicationMaster */ @Public @Stable String getClientToken(); - + @Private @Unstable void setClientToken(String clientToken); /** - * Get the ApplicationState of the application. - * @return ApplicationState of the application + * Get the YarnApplicationState of the application. + * @return YarnApplicationState of the application */ @Public @Stable - ApplicationState getState(); - + YarnApplicationState getYarnApplicationState(); + @Private @Unstable - void setState(ApplicationState state); + void setYarnApplicationState(YarnApplicationState state); /** - * Get the diagnositic information of the application in case of + * Get the diagnositic information of the application in case of * errors. - * @return diagnositic information of the application in case + * @return diagnositic information of the application in case * of errors */ @Public @Stable String getDiagnostics(); - + @Private @Unstable void setDiagnostics(String diagnostics); @@ -170,11 +170,11 @@ public interface ApplicationReport { @Public @Stable String getTrackingUrl(); - + @Private @Unstable void setTrackingUrl(String url); - + /** * Get the start time of the application. * @return start time of the application @@ -182,7 +182,7 @@ public interface ApplicationReport { @Public @Stable long getStartTime(); - + @Private @Unstable void setStartTime(long startTime); @@ -194,8 +194,21 @@ public interface ApplicationReport { @Public @Stable long getFinishTime(); - + @Private @Unstable void setFinishTime(long finishTime); + + + /** + * Get the final finish status of the application. + */ + @Public + @Stable + FinalApplicationStatus getFinalApplicationStatus(); + + @Private + @Unstable + void setFinalApplicationStatus(FinalApplicationStatus finishState); + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/FinalApplicationStatus.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/FinalApplicationStatus.java new file mode 100644 index 0000000000..170c25ae84 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/FinalApplicationStatus.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Stable; + +/** + * Enumeration of various final states of an Application. + */ +@Public +@Stable +public enum FinalApplicationStatus { + + /** Undefined state when either the application has not yet finished */ + UNDEFINED, + + /** Application which finished successfully. */ + SUCCEEDED, + + /** Application which failed. */ + FAILED, + + /** Application which was terminated by a user or admin. */ + KILLED +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationState.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java similarity index 84% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationState.java rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java index 6fcdea5cac..c45b62d54a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationState.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java @@ -22,26 +22,26 @@ import org.apache.hadoop.classification.InterfaceStability.Stable; /** - * Ennumeration of various states of an Application. + * Ennumeration of various states of an ApplicationMaster. */ @Public @Stable -public enum ApplicationState { +public enum YarnApplicationState { /** Application which was just created. */ - NEW, - + NEW, + /** Application which has been submitted. */ - SUBMITTED, - + SUBMITTED, + /** Application which is currently running. */ - RUNNING, - - /** Application which completed successfully. */ - SUCCEEDED, - + RUNNING, + + /** Application which finished successfully. */ + FINISHED, + /** Application which failed. */ - FAILED, - + FAILED, + /** Application which was terminated by a user or admin. */ KILLED } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java index ee98eed5c4..123e178806 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java @@ -19,31 +19,28 @@ package org.apache.hadoop.yarn.api.records.impl.pb; -import java.util.List; - import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; -import org.apache.hadoop.yarn.api.records.ApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationStatus; import org.apache.hadoop.yarn.api.records.ProtoBase; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationMasterProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationMasterProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStatusProto; +import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto; import org.apache.hadoop.yarn.util.ProtoUtils; - public class ApplicationMasterPBImpl extends ProtoBase implements ApplicationMaster { ApplicationMasterProto proto = ApplicationMasterProto.getDefaultInstance(); ApplicationMasterProto.Builder builder = null; boolean viaProto = false; - + private ApplicationId applicationId = null; private ApplicationStatus applicationStatus = null; - - + + public ApplicationMasterPBImpl() { builder = ApplicationMasterProto.newBuilder(); } @@ -52,9 +49,9 @@ public ApplicationMasterPBImpl(ApplicationMasterProto proto) { this.proto = proto; viaProto = true; } - + public ApplicationMasterProto getProto() { - + mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; @@ -72,24 +69,24 @@ private void mergeLocalToBuilder() { } private void mergeLocalToProto() { - if (viaProto) + if (viaProto) maybeInitBuilder(); mergeLocalToBuilder(); proto = builder.build(); - + viaProto = true; } - + private void maybeInitBuilder() { if (viaProto || builder == null) { builder = ApplicationMasterProto.newBuilder(proto); } viaProto = false; } - - + + @Override - public ApplicationState getState() { + public YarnApplicationState getState() { ApplicationMasterProtoOrBuilder p = viaProto ? proto : builder; if (!p.hasState()) { return null; @@ -98,7 +95,7 @@ public ApplicationState getState() { } @Override - public void setState(ApplicationState state) { + public void setState(YarnApplicationState state) { maybeInitBuilder(); if (state == null) { builder.clearState(); @@ -124,7 +121,7 @@ public void setHost(String host) { } builder.setHost((host)); } - + @Override public ApplicationId getApplicationId() { ApplicationMasterProtoOrBuilder p = viaProto ? proto : builder; @@ -135,7 +132,7 @@ public ApplicationId getApplicationId() { return null; } applicationId = convertFromProtoFormat(p.getApplicationId()); - + return applicationId; } @@ -145,7 +142,7 @@ public void setApplicationId(ApplicationId applicationId) { if (applicationId == null) builder.clearApplicationId(); this.applicationId = applicationId; - + } @Override public int getRpcPort() { @@ -179,7 +176,7 @@ public ApplicationStatus getStatus() { return null; } this.applicationStatus = convertFromProtoFormat(p.getStatus()); - + return this.applicationStatus; } @@ -189,7 +186,7 @@ public void setStatus(ApplicationStatus status) { if (status == null) builder.clearStatus(); this.applicationStatus = status; - + } @Override public String getClientToken() { @@ -209,7 +206,7 @@ public void setClientToken(String clientToken) { } builder.setClientToken((clientToken)); } - + @Override public int getAMFailCount() { ApplicationMasterProtoOrBuilder p = viaProto ? proto : builder; @@ -250,11 +247,11 @@ public void setDiagnostics(String diagnostics) { builder.setDiagnostics(diagnostics); } - private ApplicationStateProto convertToProtoFormat(ApplicationState e) { + private YarnApplicationStateProto convertToProtoFormat(YarnApplicationState e) { return ProtoUtils.convertToProtoFormat(e); } - private ApplicationState convertFromProtoFormat(ApplicationStateProto e) { + private YarnApplicationState convertFromProtoFormat(YarnApplicationStateProto e) { return ProtoUtils.convertFromProtoFormat(e); } @@ -274,4 +271,4 @@ private ApplicationStatusProto convertToProtoFormat(ApplicationStatus t) { return ((ApplicationStatusPBImpl)t).getProto(); } -} +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java index 2ea2ddbcdb..bdb2f8d6a2 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java @@ -18,17 +18,19 @@ package org.apache.hadoop.yarn.api.records.impl.pb; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ProtoBase; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStateProto; +import org.apache.hadoop.yarn.proto.YarnProtos.FinalApplicationStatusProto; +import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto; import org.apache.hadoop.yarn.util.ProtoUtils; -public class ApplicationReportPBImpl extends ProtoBase +public class ApplicationReportPBImpl extends ProtoBase implements ApplicationReport { ApplicationReportProto proto = ApplicationReportProto.getDefaultInstance(); ApplicationReportProto.Builder builder = null; @@ -39,7 +41,7 @@ public class ApplicationReportPBImpl extends ProtoBase public ApplicationReportPBImpl() { builder = ApplicationReportProto.newBuilder(); } - + public ApplicationReportPBImpl(ApplicationReportProto proto) { this.proto = proto; viaProto = true; @@ -87,12 +89,12 @@ public String getQueue() { } @Override - public ApplicationState getState() { + public YarnApplicationState getYarnApplicationState() { ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; - if (!p.hasState()) { + if (!p.hasYarnApplicationState()) { return null; } - return convertFromProtoFormat(p.getState()); + return convertFromProtoFormat(p.getYarnApplicationState()); } @Override @@ -138,6 +140,27 @@ public String getDiagnostics() { return p.getDiagnostics(); } + @Override + public long getStartTime() { + ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; + return p.getStartTime(); + } + + @Override + public long getFinishTime() { + ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; + return p.getFinishTime(); + } + + @Override + public FinalApplicationStatus getFinalApplicationStatus() { + ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasFinalApplicationStatus()) { + return null; + } + return convertFromProtoFormat(p.getFinalApplicationStatus()); + } + @Override public void setApplicationId(ApplicationId applicationId) { maybeInitBuilder(); @@ -177,13 +200,13 @@ public void setQueue(String queue) { } @Override - public void setState(ApplicationState state) { + public void setYarnApplicationState(YarnApplicationState state) { maybeInitBuilder(); if (state == null) { - builder.clearState(); + builder.clearYarnApplicationState(); return; } - builder.setState(convertToProtoFormat(state)); + builder.setYarnApplicationState(convertToProtoFormat(state)); } @Override @@ -232,38 +255,36 @@ public void setDiagnostics(String diagnostics) { builder.setDiagnostics(diagnostics); } - @Override - public ApplicationReportProto getProto() { - mergeLocalToProto(); - proto = viaProto ? proto : builder.build(); - viaProto = true; - return proto; - } - - @Override - public long getStartTime() { - ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; - return p.getStartTime(); - } - @Override public void setStartTime(long startTime) { maybeInitBuilder(); builder.setStartTime(startTime); } - @Override - public long getFinishTime() { - ApplicationReportProtoOrBuilder p = viaProto ? proto : builder; - return p.getFinishTime(); - } - @Override public void setFinishTime(long finishTime) { maybeInitBuilder(); builder.setFinishTime(finishTime); } + @Override + public void setFinalApplicationStatus(FinalApplicationStatus finishState) { + maybeInitBuilder(); + if (finishState == null) { + builder.clearFinalApplicationStatus(); + return; + } + builder.setFinalApplicationStatus(convertToProtoFormat(finishState)); + } + + @Override + public ApplicationReportProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + private void mergeLocalToBuilder() { if (this.applicationId != null && !((ApplicationIdPBImpl) this.applicationId).getProto().equals( @@ -291,16 +312,25 @@ private ApplicationIdProto convertToProtoFormat(ApplicationId t) { return ((ApplicationIdPBImpl) t).getProto(); } - private ApplicationState convertFromProtoFormat(ApplicationStateProto s) { - return ProtoUtils.convertFromProtoFormat(s); - } - - private ApplicationStateProto convertToProtoFormat(ApplicationState s) { - return ProtoUtils.convertToProtoFormat(s); - } - private ApplicationIdPBImpl convertFromProtoFormat( ApplicationIdProto applicationId) { return new ApplicationIdPBImpl(applicationId); } + + private YarnApplicationState convertFromProtoFormat(YarnApplicationStateProto s) { + return ProtoUtils.convertFromProtoFormat(s); + } + + private YarnApplicationStateProto convertToProtoFormat(YarnApplicationState s) { + return ProtoUtils.convertToProtoFormat(s); + } + + private FinalApplicationStatus convertFromProtoFormat(FinalApplicationStatusProto s) { + return ProtoUtils.convertFromProtoFormat(s); + } + + private FinalApplicationStatusProto convertToProtoFormat(FinalApplicationStatus s) { + return ProtoUtils.convertToProtoFormat(s); + } + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/ProtoUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/ProtoUtils.java index 1451847111..927764b063 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/ProtoUtils.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/ProtoUtils.java @@ -20,24 +20,26 @@ import java.nio.ByteBuffer; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueState; -import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStateProto; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto; +import org.apache.hadoop.yarn.proto.YarnProtos.FinalApplicationStatusProto; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceVisibilityProto; import org.apache.hadoop.yarn.proto.YarnProtos.QueueACLProto; import org.apache.hadoop.yarn.proto.YarnProtos.QueueStateProto; +import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto; import com.google.protobuf.ByteString; public class ProtoUtils { - - + + /* * ContainerState */ @@ -48,18 +50,29 @@ public static ContainerStateProto convertToProtoFormat(ContainerState e) { public static ContainerState convertFromProtoFormat(ContainerStateProto e) { return ContainerState.valueOf(e.name().replace(CONTAINER_STATE_PREFIX, "")); } - + /* - * ApplicationState + * YarnApplicationState */ - public static ApplicationStateProto convertToProtoFormat(ApplicationState e) { - return ApplicationStateProto.valueOf(e.name()); + public static YarnApplicationStateProto convertToProtoFormat(YarnApplicationState e) { + return YarnApplicationStateProto.valueOf(e.name()); } - public static ApplicationState convertFromProtoFormat(ApplicationStateProto e) { - return ApplicationState.valueOf(e.name()); + public static YarnApplicationState convertFromProtoFormat(YarnApplicationStateProto e) { + return YarnApplicationState.valueOf(e.name()); } - + + /* + * FinalApplicationStatus + */ + private static String FINAL_APPLICATION_STATUS_PREFIX = "APP_"; + public static FinalApplicationStatusProto convertToProtoFormat(FinalApplicationStatus e) { + return FinalApplicationStatusProto.valueOf(FINAL_APPLICATION_STATUS_PREFIX + e.name()); + } + public static FinalApplicationStatus convertFromProtoFormat(FinalApplicationStatusProto e) { + return FinalApplicationStatus.valueOf(e.name().replace(FINAL_APPLICATION_STATUS_PREFIX, "")); + } + /* * LocalResourceType */ @@ -69,7 +82,7 @@ public static LocalResourceTypeProto convertToProtoFormat(LocalResourceType e) { public static LocalResourceType convertFromProtoFormat(LocalResourceTypeProto e) { return LocalResourceType.valueOf(e.name()); } - + /* * LocalResourceVisibility */ @@ -79,7 +92,7 @@ public static LocalResourceVisibilityProto convertToProtoFormat(LocalResourceVis public static LocalResourceVisibility convertFromProtoFormat(LocalResourceVisibilityProto e) { return LocalResourceVisibility.valueOf(e.name()); } - + /* * ByteBuffer */ @@ -98,7 +111,7 @@ public static ByteString convertToProtoFormat(ByteBuffer byteBuffer) { byteBuffer.position(oldPos); return bs; } - + /* * QueueState */ @@ -109,7 +122,7 @@ public static QueueStateProto convertToProtoFormat(QueueState e) { public static QueueState convertFromProtoFormat(QueueStateProto e) { return QueueState.valueOf(e.name().replace(QUEUE_STATE_PREFIX, "")); } - + /* * QueueACL */ diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 704c710996..14bfb11496 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -76,14 +76,20 @@ message ContainerProto { optional ContainerStatusProto container_status = 8; } -enum ApplicationStateProto { +enum YarnApplicationStateProto { NEW = 1; SUBMITTED = 2; RUNNING = 3; - RESTARTING = 4; - SUCCEEDED = 5; - FAILED = 6; - KILLED = 7; + FINISHED = 4; + FAILED = 5; + KILLED = 6; +} + +enum FinalApplicationStatusProto { + APP_UNDEFINED = 0; + APP_SUCCEEDED = 1; + APP_FAILED = 2; + APP_KILLED = 3; } message ApplicationStatusProto { @@ -98,7 +104,7 @@ message ApplicationMasterProto { optional int32 rpc_port = 3; optional string trackingUrl = 4; optional ApplicationStatusProto status = 5; - optional ApplicationStateProto state = 6; + optional YarnApplicationStateProto state = 6; optional string client_token = 7; optional int32 containerCount = 8; optional int32 amFailCount = 9; @@ -107,7 +113,7 @@ message ApplicationMasterProto { message URLProto { optional string scheme = 1; - optional string host = 2; + optional string host = 2; optional int32 port = 3; optional string file = 4; } @@ -140,12 +146,13 @@ message ApplicationReportProto { optional int32 rpc_port = 6; optional string client_token = 7; optional ApplicationStatusProto status = 8; - optional ApplicationStateProto state = 9; + optional YarnApplicationStateProto yarn_application_state = 9; optional ContainerProto masterContainer = 10; optional string trackingUrl = 11; optional string diagnostics = 12 [default = "N/A"]; optional int64 startTime = 13; optional int64 finishTime = 14; + optional FinalApplicationStatusProto final_application_status = 15; } message NodeIdProto { @@ -195,7 +202,7 @@ message AMResponseProto { message ApplicationSubmissionContextProto { optional ApplicationIdProto application_id = 1; optional string application_name = 2 [default = "N/A"]; - optional string user = 3; + optional string user = 3; optional string queue = 4 [default = "default"]; optional PriorityProto priority = 5; optional ContainerLaunchContextProto am_container_spec = 6; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 1a992ad578..f477048ca7 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -42,7 +42,7 @@ message FinishApplicationMasterRequestProto { optional ApplicationAttemptIdProto application_attempt_id = 1; optional string diagnostics = 2; optional string tracking_url = 3; - optional string final_state = 4; + optional FinalApplicationStatusProto final_application_status = 4; } message FinishApplicationMasterResponseProto { @@ -115,7 +115,7 @@ message GetClusterNodesRequestProto { } message GetClusterNodesResponseProto { - repeated NodeReportProto nodeReports = 1; + repeated NodeReportProto nodeReports = 1; } message GetQueueInfoRequestProto { @@ -133,7 +133,7 @@ message GetQueueUserAclsInfoRequestProto { } message GetQueueUserAclsInfoResponseProto { - repeated QueueUserACLInfoProto queueUserAcls = 1; + repeated QueueUserACLInfoProto queueUserAcls = 1; } ////////////////////////////////////////////////////// diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml new file mode 100644 index 0000000000..406ec436d5 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml @@ -0,0 +1,105 @@ + + + + + hadoop-yarn-applications + org.apache.hadoop + ${yarn.version} + + 4.0.0 + org.apache.hadoop + hadoop-yarn-applications-distributedshell + hadoop-yarn-applications-distributedshell + + + ${project.artifact.file} + ${project.parent.parent.basedir} + + + + + org.apache.hadoop + hadoop-yarn-api + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + test + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-server-common + test + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-server-tests + test-jar + test + ${yarn.version} + + + + + + + maven-jar-plugin + + + + jar + + + test-compile + + + + + maven-dependency-plugin + + + build-classpath + generate-sources + + build-classpath + + + + target/classes/yarn-apps-ds-generated-classpath + + + + + + + + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java new file mode 100644 index 0000000000..19800ba91a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -0,0 +1,831 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Vector; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.AMRMProtocol; +import org.apache.hadoop.yarn.api.ApplicationConstants; +import org.apache.hadoop.yarn.api.ContainerManager; + +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +//import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; +//import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; + +import org.apache.hadoop.yarn.api.records.AMResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRemoteException; +import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.hadoop.yarn.util.Records; + +/** + * An ApplicationMaster for executing shell commands on a set of launched containers using the YARN framework. + * + *

This class is meant to act as an example on how to write yarn-based application masters.

+ * + *

The ApplicationMaster is started on a container by the ResourceManager's launcher. + * The first thing that the ApplicationMaster needs to do is to connect and register itself with + * the ResourceManager. The registration sets up information within the ResourceManager + * regarding what host:port the ApplicationMaster is listening on to provide any form of functionality to a client + * as well as a tracking url that a client can use to keep track of status/job history if needed.

+ * + *

The ApplicationMaster needs to send a heartbeat to the ResourceManager at regular intervals + * to inform the ResourceManager that it is up and alive. The {@link AMRMProtocol#allocate} to the + * ResourceManager from the ApplicationMaster acts as a heartbeat. + * + *

For the actual handling of the job, the ApplicationMaster has to request for the + * ResourceManager via {@link AllocateRequest} for the required no. of containers using {@link ResourceRequest} + * with the necessary resource specifications such as node location, computational (memory/disk/cpu) resource requirements. + * The ResourceManager responds with an {@link AllocateResponse} that informs the ApplicationMaster + * of the set of newly allocated containers, completed containers as well as current state of available resources.

+ * + *

For each allocated container, the ApplicationMaster can then set up the necessary launch context via + * {@link ContainerLaunchContext} to specify the allocated container id, local resources required by the executable, + * the environment to be setup for the executable, commands to execute, etc. and submit a {@link StartContainerRequest} + * to the {@link ContainerManager} to launch and execute the defined commands on the given allocated container.

+ * + *

The ApplicationMaster can monitor the launched container by either querying the ResourceManager + * using {@link AMRMProtocol#allocate} to get updates on completed containers or via the {@link ContainerManager} + * by querying for the status of the allocated container's {@link ContainerId}. + * + *

After the job has been completed, the ApplicationMaster has to send a {@link FinishApplicationMasterRequest} + * to the ResourceManager to inform it that the ApplicationMaster has been completed. + */ +public class ApplicationMaster { + + private static final Log LOG = LogFactory.getLog(ApplicationMaster.class); + + // Configuration + private Configuration conf; + // YARN RPC to communicate with the Resource Manager or Node Manager + private YarnRPC rpc; + + // Handle to communicate with the Resource Manager + private AMRMProtocol resourceManager; + + // Application Attempt Id ( combination of attemptId and fail count ) + private ApplicationAttemptId appAttemptID; + + // TODO + // For status update for clients - yet to be implemented + // Hostname of the container + private String appMasterHostname = ""; + // Port on which the app master listens for status update requests from clients + private int appMasterRpcPort = 0; + // Tracking url to which app master publishes info for clients to monitor + private String appMasterTrackingUrl = ""; + + // App Master configuration + // No. of containers to run shell command on + private int numTotalContainers = 1; + // Memory to request for the container on which the shell command will run + private int containerMemory = 10; + // Priority of the request + private int requestPriority; + + // Incremental counter for rpc calls to the RM + private AtomicInteger rmRequestID = new AtomicInteger(); + + // Simple flag to denote whether all works is done + private boolean appDone = false; + // Counter for completed containers ( complete denotes successful or failed ) + private AtomicInteger numCompletedContainers = new AtomicInteger(); + // Allocated container count so that we know how many containers has the RM + // allocated to us + private AtomicInteger numAllocatedContainers = new AtomicInteger(); + // Count of failed containers + private AtomicInteger numFailedContainers = new AtomicInteger(); + // Count of containers already requested from the RM + // Needed as once requested, we should not request for containers again and again. + // Only request for more if the original requirement changes. + private AtomicInteger numRequestedContainers = new AtomicInteger(); + + // Shell command to be executed + private String shellCommand = ""; + // Args to be passed to the shell command + private String shellArgs = ""; + // Env variables to be setup for the shell command + private Map shellEnv = new HashMap(); + + // Location of shell script ( obtained from info set in env ) + // Shell script path in fs + private String shellScriptPath = ""; + // Timestamp needed for creating a local resource + private long shellScriptPathTimestamp = 0; + // File length needed for local resource + private long shellScriptPathLen = 0; + + // Hardcoded path to shell script in launch container's local env + private final String ExecShellStringPath = "ExecShellScript.sh"; + + // Containers to be released + private CopyOnWriteArrayList releasedContainers = new CopyOnWriteArrayList(); + + // Launch threads + private List launchThreads = new ArrayList(); + + /** + * @param args Command line args + */ + public static void main(String[] args) { + boolean result = false; + try { + ApplicationMaster appMaster = new ApplicationMaster(); + LOG.info("Initializing ApplicationMaster"); + boolean doRun = appMaster.init(args); + if (!doRun) { + System.exit(0); + } + result = appMaster.run(); + } catch (Throwable t) { + LOG.fatal("Error running ApplicationMaster", t); + System.exit(1); + } + if (result) { + LOG.info("Application Master completed successfully. exiting"); + System.exit(0); + } + else { + LOG.info("Application Master failed. exiting"); + System.exit(2); + } + } + + /** + * Dump out contents of $CWD and the environment to stdout for debugging + */ + private void dumpOutDebugInfo() { + + LOG.info("Dump debug output"); + Map envs = System.getenv(); + for (Map.Entry env : envs.entrySet()) { + LOG.info("System env: key=" + env.getKey() + ", val=" + env.getValue()); + System.out.println("System env: key=" + env.getKey() + ", val=" + env.getValue()); + } + + String cmd = "ls -al"; + Runtime run = Runtime.getRuntime(); + Process pr = null; + try { + pr = run.exec(cmd); + pr.waitFor(); + + BufferedReader buf = new BufferedReader(new InputStreamReader(pr.getInputStream())); + String line = ""; + while ((line=buf.readLine())!=null) { + LOG.info("System CWD content: " + line); + System.out.println("System CWD content: " + line); + } + buf.close(); + } catch (IOException e) { + e.printStackTrace(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + public ApplicationMaster() throws Exception { + // Set up the configuration and RPC + conf = new Configuration(); + rpc = YarnRPC.create(conf); + } + /** + * Parse command line options + * @param args Command line args + * @return Whether init successful and run should be invoked + * @throws ParseException + * @throws IOException + */ + public boolean init(String[] args) throws ParseException, IOException { + + Options opts = new Options(); + opts.addOption("app_attempt_id", true, "App Attempt ID. Not to be used unless for testing purposes"); + opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); + opts.addOption("shell_script", true, "Location of the shell script to be executed"); + opts.addOption("shell_args", true, "Command line args for the shell script"); + opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); + opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); + opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); + opts.addOption("priority", true, "Application Priority. Default 0"); + opts.addOption("debug", false, "Dump out debug information"); + + opts.addOption("help", false, "Print usage"); + CommandLine cliParser = new GnuParser().parse(opts, args); + + if (args.length == 0) { + printUsage(opts); + throw new IllegalArgumentException("No args specified for application master to initialize"); + } + + if (cliParser.hasOption("help")) { + printUsage(opts); + return false; + } + + if (cliParser.hasOption("debug")) { + dumpOutDebugInfo(); + } + + Map envs = System.getenv(); + + appAttemptID = Records.newRecord(ApplicationAttemptId.class); + if (!envs.containsKey(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV)) { + if (cliParser.hasOption("app_attempt_id")) { + String appIdStr = cliParser.getOptionValue("app_attempt_id", ""); + appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr); + } + else { + throw new IllegalArgumentException("Application Attempt Id not set in the environment"); + } + } else { + appAttemptID = ConverterUtils.toApplicationAttemptId(envs.get(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV)); + } + + LOG.info("Application master for app" + + ", appId=" + appAttemptID.getApplicationId().getId() + + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + + ", attemptId=" + appAttemptID.getAttemptId()); + + if (!cliParser.hasOption("shell_command")) { + throw new IllegalArgumentException("No shell command specified to be executed by application master"); + } + shellCommand = cliParser.getOptionValue("shell_command"); + + if (cliParser.hasOption("shell_args")) { + shellArgs = cliParser.getOptionValue("shell_args"); + } + if (cliParser.hasOption("shell_env")) { + String shellEnvs[] = cliParser.getOptionValues("shell_env"); + for (String env : shellEnvs) { + env = env.trim(); + int index = env.indexOf('='); + if (index == -1) { + shellEnv.put(env, ""); + continue; + } + String key = env.substring(0, index); + String val = ""; + if (index < (env.length()-1)) { + val = env.substring(index+1); + } + shellEnv.put(key, val); + } + } + + if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION)) { + shellScriptPath = envs.get(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION); + + if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP)) { + shellScriptPathTimestamp = Long.valueOf(envs.get(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP)); + } + if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN)) { + shellScriptPathLen = Long.valueOf(envs.get(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN)); + } + + if (!shellScriptPath.isEmpty() + && (shellScriptPathTimestamp <= 0 + || shellScriptPathLen <= 0)) { + LOG.error("Illegal values in env for shell script path" + + ", path=" + shellScriptPath + + ", len=" + shellScriptPathLen + + ", timestamp=" + shellScriptPathTimestamp); + throw new IllegalArgumentException("Illegal values in env for shell script path"); + } + } + + containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); + numTotalContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); + requestPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); + + return true; + } + + /** + * Helper function to print usage + * @param opts Parsed command line options + */ + private void printUsage(Options opts) { + new HelpFormatter().printHelp("ApplicationMaster", opts); + } + + /** + * Main run function for the application master + * @throws YarnRemoteException + */ + public boolean run() throws YarnRemoteException { + LOG.info("Starting ApplicationMaster"); + + // Connect to ResourceManager + resourceManager = connectToRM(); + + // Setup local RPC Server to accept status requests directly from clients + // TODO need to setup a protocol for client to be able to communicate to the RPC server + // TODO use the rpc port info to register with the RM for the client to send requests to this app master + + // Register self with ResourceManager + RegisterApplicationMasterResponse response = registerToRM(); + // Dump out information about cluster capability as seen by the resource manager + int minMem = response.getMinimumResourceCapability().getMemory(); + int maxMem = response.getMaximumResourceCapability().getMemory(); + LOG.info("Min mem capabililty of resources in this cluster " + minMem); + LOG.info("Max mem capabililty of resources in this cluster " + maxMem); + + // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be + // a multiple of the min value and cannot exceed the max. + // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min + if (containerMemory < minMem) { + LOG.info("Container memory specified below min threshold of cluster. Using min value." + + ", specified=" + containerMemory + + ", min=" + minMem); + containerMemory = minMem; + } + else if (containerMemory > maxMem) { + LOG.info("Container memory specified above max threshold of cluster. Using max value." + + ", specified=" + containerMemory + + ", max=" + maxMem); + containerMemory = maxMem; + } + + // Setup heartbeat emitter + // TODO poll RM every now and then with an empty request to let RM know that we are alive + // The heartbeat interval after which an AM is timed out by the RM is defined by a config setting: + // RM_AM_EXPIRY_INTERVAL_MS with default defined by DEFAULT_RM_AM_EXPIRY_INTERVAL_MS + // The allocate calls to the RM count as heartbeats so, for now, this additional heartbeat emitter + // is not required. + + // Setup ask for containers from RM + // Send request for containers to RM + // Until we get our fully allocated quota, we keep on polling RM for containers + // Keep looping until all the containers are launched and shell script executed on them + // ( regardless of success/failure). + + int loopCounter = -1; + + while (numCompletedContainers.get() < numTotalContainers + && !appDone) { + loopCounter++; + + // log current state + LOG.info("Current application state: loop=" + loopCounter + + ", appDone=" + appDone + + ", total=" + numTotalContainers + + ", requested=" + numRequestedContainers + + ", completed=" + numCompletedContainers + + ", failed=" + numFailedContainers + + ", currentAllocated=" + numAllocatedContainers); + + // Sleep before each loop when asking RM for containers + // to avoid flooding RM with spurious requests when it + // need not have any available containers + // Sleeping for 1000 ms. + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + LOG.info("Sleep interrupted " + e.getMessage()); + } + + // No. of containers to request + // For the first loop, askCount will be equal to total containers needed + // From that point on, askCount will always be 0 as current implementation + // does not change its ask on container failures. + int askCount = numTotalContainers - numRequestedContainers.get(); + numRequestedContainers.addAndGet(askCount); + + // Setup request to be sent to RM to allocate containers + List resourceReq = new ArrayList(); + if (askCount > 0) { + ResourceRequest containerAsk = setupContainerAskForRM(askCount); + resourceReq.add(containerAsk); + } + + // Send the request to RM + LOG.info("Asking RM for containers" + + ", askCount=" + askCount); + AMResponse amResp = sendContainerAskToRM(resourceReq); + + // Retrieve list of allocated containers from the response + List allocatedContainers = amResp.getAllocatedContainers(); + LOG.info("Got response from RM for container ask, allocatedCnt=" + allocatedContainers.size()); + numAllocatedContainers.addAndGet(allocatedContainers.size()); + for (Container allocatedContainer : allocatedContainers) { + LOG.info("Launching shell command on a new container." + + ", containerId=" + allocatedContainer.getId() + + ", containerNode=" + allocatedContainer.getNodeId().getHost() + + ":" + allocatedContainer.getNodeId().getPort() + + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + + ", containerState" + allocatedContainer.getState() + + ", containerResourceMemory" + allocatedContainer.getResource().getMemory()); + // + ", containerToken" + allocatedContainer.getContainerToken().getIdentifier().toString()); + + LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(allocatedContainer); + Thread launchThread = new Thread(runnableLaunchContainer); + + // launch and start the container on a separate thread to keep the main thread unblocked + // as all containers may not be allocated at one go. + launchThreads.add(launchThread); + launchThread.start(); + } + + // Check what the current available resources in the cluster are + // TODO should we do anything if the available resources are not enough? + Resource availableResources = amResp.getAvailableResources(); + LOG.info("Current available resources in the cluster " + availableResources); + + // Check the completed containers + List completedContainers = amResp.getCompletedContainersStatuses(); + LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); + for (ContainerStatus containerStatus : completedContainers) { + LOG.info("Got container status for containerID= " + containerStatus.getContainerId() + + ", state=" + containerStatus.getState() + + ", exitStatus=" + containerStatus.getExitStatus() + + ", diagnostics=" + containerStatus.getDiagnostics()); + + // non complete containers should not be here + assert(containerStatus.getState() == ContainerState.COMPLETE); + + // increment counters for completed/failed containers + int exitStatus = containerStatus.getExitStatus(); + if (0 != exitStatus) { + // container failed + if (-100 != exitStatus) { + // shell script failed + // counts as completed + numCompletedContainers.incrementAndGet(); + numFailedContainers.incrementAndGet(); + } + else { + // something else bad happened + // app job did not complete for some reason + // we should re-try as the container was lost for some reason + numAllocatedContainers.decrementAndGet(); + numRequestedContainers.decrementAndGet(); + // we do not need to release the container as it would be done + // by the RM/CM. + } + } + else { + // nothing to do + // container completed successfully + numCompletedContainers.incrementAndGet(); + LOG.info("Container completed successfully." + + ", containerId=" + containerStatus.getContainerId()); + } + + } + if (numCompletedContainers.get() == numTotalContainers) { + appDone = true; + } + + LOG.info("Current application state: loop=" + loopCounter + + ", appDone=" + appDone + + ", total=" + numTotalContainers + + ", requested=" + numRequestedContainers + + ", completed=" + numCompletedContainers + + ", failed=" + numFailedContainers + + ", currentAllocated=" + numAllocatedContainers); + + // TODO + // Add a timeout handling layer + // for misbehaving shell commands + } + + // Join all launched threads + // needed for when we time out + // and we need to release containers + for (Thread launchThread : launchThreads) { + try { + launchThread.join(10000); + } catch (InterruptedException e) { + LOG.info("Exception thrown in thread join: " + e.getMessage()); + e.printStackTrace(); + } + } + + // When the application completes, it should send a finish application signal + // to the RM + LOG.info("Application completed. Signalling finish to RM"); + + FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class); + finishReq.setAppAttemptId(appAttemptID); + boolean isSuccess = true; + if (numFailedContainers.get() == 0) { + finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED); + } + else { + finishReq.setFinishApplicationStatus(FinalApplicationStatus.FAILED); + String diagnostics = "Diagnostics." + + ", total=" + numTotalContainers + + ", completed=" + numCompletedContainers.get() + + ", allocated=" + numAllocatedContainers.get() + + ", failed=" + numFailedContainers.get(); + finishReq.setDiagnostics(diagnostics); + isSuccess = false; + } + resourceManager.finishApplicationMaster(finishReq); + return isSuccess; + } + + /** + * Thread to connect to the {@link ContainerManager} and + * launch the container that will execute the shell command. + */ + private class LaunchContainerRunnable implements Runnable { + + // Allocated container + Container container; + // Handle to communicate with ContainerManager + ContainerManager cm; + + /** + * @param lcontainer Allocated container + */ + public LaunchContainerRunnable(Container lcontainer) { + this.container = lcontainer; + } + + /** + * Helper function to connect to CM + */ + private void connectToCM() { + String cmIpPortStr = container.getNodeId().getHost() + ":" + + container.getNodeId().getPort(); + InetSocketAddress cmAddress = NetUtils.createSocketAddr(cmIpPortStr); + LOG.info("Connecting to ResourceManager at " + cmIpPortStr); + this.cm = ((ContainerManager) rpc.getProxy(ContainerManager.class, cmAddress, conf)); + } + + + @Override + /** + * Connects to CM, sets up container launch context + * for shell command and eventually dispatches the container + * start request to the CM. + */ + public void run() { + // Connect to ContainerManager + LOG.info("Connecting to container manager for containerid=" + container.getId()); + connectToCM(); + + LOG.info("Setting up container launch container for containerid=" + container.getId()); + ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); + + ctx.setContainerId(container.getId()); + ctx.setResource(container.getResource()); + + try { + ctx.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); + } catch (IOException e) { + LOG.info("Getting current user info failed when trying to launch the container" + + e.getMessage()); + } + + // Set the environment + ctx.setEnvironment(shellEnv); + + // Set the local resources + Map localResources = new HashMap(); + + // The container for the eventual shell commands needs its own local resources too. + // In this scenario, if a shell script is specified, we need to have it copied + // and made available to the container. + if (!shellScriptPath.isEmpty()) { + LocalResource shellRsrc = Records.newRecord(LocalResource.class); + shellRsrc.setType(LocalResourceType.FILE); + shellRsrc.setVisibility(LocalResourceVisibility.APPLICATION); + try { + shellRsrc.setResource(ConverterUtils.getYarnUrlFromURI(new URI(shellScriptPath))); + } catch (URISyntaxException e) { + LOG.error("Error when trying to use shell script path specified in env" + + ", path=" + shellScriptPath); + e.printStackTrace(); + + // A failure scenario on bad input such as invalid shell script path + // We know we cannot continue launching the container + // so we should release it. + // TODO + numCompletedContainers.incrementAndGet(); + numFailedContainers.incrementAndGet(); + return; + } + shellRsrc.setTimestamp(shellScriptPathTimestamp); + shellRsrc.setSize(shellScriptPathLen); + localResources.put(ExecShellStringPath, shellRsrc); + } + ctx.setLocalResources(localResources); + + // Set the necessary command to execute on the allocated container + Vector vargs = new Vector(5); + + // Set executable command + vargs.add(shellCommand); + // Set shell script path + if (!shellScriptPath.isEmpty()) { + vargs.add(ExecShellStringPath); + } + + // Set args for the shell command if any + vargs.add(shellArgs); + // Add log redirect params + // TODO + // We should redirect the output to hdfs instead of local logs + // so as to be able to look at the final output after the containers + // have been released. + // Could use a path suffixed with /AppId/AppAttempId/ContainerId/std[out|err] + vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"); + vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"); + + // Get final commmand + StringBuilder command = new StringBuilder(); + for (CharSequence str : vargs) { + command.append(str).append(" "); + } + + List commands = new ArrayList(); + commands.add(command.toString()); + ctx.setCommands(commands); + + StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class); + startReq.setContainerLaunchContext(ctx); + try { + cm.startContainer(startReq); + } catch (YarnRemoteException e) { + LOG.info("Start container failed for :" + + ", containerId=" + container.getId()); + e.printStackTrace(); + // TODO do we need to release this container? + } + + // Get container status? + // Left commented out as the shell scripts are short lived + // and we are relying on the status for completed containers from RM to detect status + + // GetContainerStatusRequest statusReq = Records.newRecord(GetContainerStatusRequest.class); + // statusReq.setContainerId(container.getId()); + // GetContainerStatusResponse statusResp; + // try { + // statusResp = cm.getContainerStatus(statusReq); + // LOG.info("Container Status" + // + ", id=" + container.getId() + // + ", status=" +statusResp.getStatus()); + // } catch (YarnRemoteException e) { + // e.printStackTrace(); + // } + } + } + + /** + * Connect to the Resource Manager + * @return Handle to communicate with the RM + */ + private AMRMProtocol connectToRM() { + YarnConfiguration yarnConf = new YarnConfiguration(conf); + InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get( + YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); + LOG.info("Connecting to ResourceManager at " + rmAddress); + return ((AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf)); + } + + /** + * Register the Application Master to the Resource Manager + * @return the registration response from the RM + * @throws YarnRemoteException + */ + private RegisterApplicationMasterResponse registerToRM() throws YarnRemoteException { + RegisterApplicationMasterRequest appMasterRequest = Records.newRecord(RegisterApplicationMasterRequest.class); + + // set the required info into the registration request: + // application attempt id, + // host on which the app master is running + // rpc port on which the app master accepts requests from the client + // tracking url for the app master + appMasterRequest.setApplicationAttemptId(appAttemptID); + appMasterRequest.setHost(appMasterHostname); + appMasterRequest.setRpcPort(appMasterRpcPort); + appMasterRequest.setTrackingUrl(appMasterTrackingUrl); + + return resourceManager.registerApplicationMaster(appMasterRequest); + } + + /** + * Setup the request that will be sent to the RM for the container ask. + * @param numContainers Containers to ask for from RM + * @return the setup ResourceRequest to be sent to RM + */ + private ResourceRequest setupContainerAskForRM(int numContainers) { + ResourceRequest request = Records.newRecord(ResourceRequest.class); + + // setup requirements for hosts + // whether a particular rack/host is needed + // Refer to apis under org.apache.hadoop.net for more + // details on how to get figure out rack/host mapping. + // using * as any host will do for the distributed shell app + request.setHostName("*"); + + // set no. of containers needed + request.setNumContainers(numContainers); + + // set the priority for the request + Priority pri = Records.newRecord(Priority.class); + // TODO - what is the range for priority? how to decide? + pri.setPriority(requestPriority); + request.setPriority(pri); + + // Set up resource type requirements + // For now, only memory is supported so we set memory requirements + Resource capability = Records.newRecord(Resource.class); + capability.setMemory(containerMemory); + request.setCapability(capability); + + return request; + } + + /** + * Ask RM to allocate given no. of containers to this Application Master + * @param requestedContainers Containers to ask for from RM + * @return Response from RM to AM with allocated containers + * @throws YarnRemoteException + */ + private AMResponse sendContainerAskToRM(List requestedContainers) + throws YarnRemoteException { + AllocateRequest req = Records.newRecord(AllocateRequest.class); + req.setResponseId(rmRequestID.incrementAndGet()); + req.setApplicationAttemptId(appAttemptID); + req.addAllAsks(requestedContainers); + req.addAllReleases(releasedContainers); + req.setProgress((float)numCompletedContainers.get()/numTotalContainers); + + LOG.info("Sending request to RM for containers" + + ", requestedSet=" + requestedContainers.size() + + ", releasedSet=" + releasedContainers.size() + + ", progress=" + req.getProgress()); + + for (ResourceRequest rsrcReq : requestedContainers) { + LOG.info("Requested container ask: " + rsrcReq.toString()); + } + for (ContainerId id : releasedContainers) { + LOG.info("Released container, id=" + id.getId()); + } + + AllocateResponse resp = resourceManager.allocate(req); + return resp.getAMResponse(); + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java new file mode 100644 index 0000000000..caccb2615c --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -0,0 +1,789 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Vector; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.SecurityInfo; +import org.apache.hadoop.yarn.api.ApplicationConstants; +import org.apache.hadoop.yarn.api.ClientRMProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.NodeReport; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.QueueACL; +import org.apache.hadoop.yarn.api.records.QueueInfo; +import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRemoteException; +import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.hadoop.yarn.util.Records; + + +/** + * Client for Distributed Shell application submission to YARN. + * + *

The distributed shell client allows an application master to be launched that in turn would run + * the provided shell command on a set of containers.

+ * + *

This client is meant to act as an example on how to write yarn-based applications.

+ * + *

To submit an application, a client first needs to connect to the ResourceManager + * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} + * provides a way for the client to get access to cluster information and to request for a + * new {@link ApplicationId}.

+ * + *

For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. + * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} + * and application name, user submitting the application, the priority assigned to the application and the queue + * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} + * also defines the {@link ContainerLaunchContext} which describes the Container with which + * the {@link ApplicationMaster} is launched.

+ * + *

The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the + * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available + * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the + * {@link ApplicationMaster}.

+ * + *

Using the {@link ApplicationSubmissionContext}, the client submits the application to the + * ResourceManager and then monitors the application by requesting the ResourceManager + * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client + * kills the application by submitting a {@link KillApplicationRequest} to the ResourceManager.

+ * + */ +public class Client { + + private static final Log LOG = LogFactory.getLog(Client.class); + + // Configuration + private Configuration conf; + + // RPC to communicate to RM + private YarnRPC rpc; + + // Handle to talk to the Resource Manager/Applications Manager + private ClientRMProtocol applicationsManager; + + // Application master specific info to register a new Application with RM/ASM + private String appName = ""; + // App master priority + private int amPriority = 0; + // Queue for App master + private String amQueue = ""; + // User to run app master as + private String amUser = ""; + // Amt. of memory resource to request for to run the App Master + private int amMemory = 10; + + // Application master jar file + private String appMasterJar = ""; + // Main class to invoke application master + private String appMasterMainClass = ""; + + // Shell command to be executed + private String shellCommand = ""; + // Location of shell script + private String shellScriptPath = ""; + // Args to be passed to the shell command + private String shellArgs = ""; + // Env variables to be setup for the shell command + private Map shellEnv = new HashMap(); + // Shell Command Container priority + private int shellCmdPriority = 0; + + // Amt of memory to request for container in which shell script will be executed + private int containerMemory = 10; + // No. of containers in which the shell script needs to be executed + private int numContainers = 1; + + // log4j.properties file + // if available, add to local resources and set into classpath + private String log4jPropFile = ""; + + // Start time for client + private final long clientStartTime = System.currentTimeMillis(); + // Timeout threshold for client. Kill app after time interval expires. + private long clientTimeout = 600000; + + // Debug flag + boolean debugFlag = false; + + /** + * @param args Command line arguments + */ + public static void main(String[] args) { + boolean result = false; + try { + Client client = new Client(); + LOG.info("Initializing Client"); + boolean doRun = client.init(args); + if (!doRun) { + System.exit(0); + } + result = client.run(); + } catch (Throwable t) { + LOG.fatal("Error running CLient", t); + System.exit(1); + } + if (result) { + LOG.info("Application completed successfully"); + System.exit(0); + } + LOG.error("Application failed to complete successfully"); + System.exit(2); + } + + /** + */ + public Client() throws Exception { + // Set up the configuration and RPC + conf = new Configuration(); + rpc = YarnRPC.create(conf); + } + + /** + * Helper function to print out usage + * @param opts Parsed command line options + */ + private void printUsage(Options opts) { + new HelpFormatter().printHelp("Client", opts); + } + + /** + * Parse command line options + * @param args Parsed command line options + * @return Whether the init was successful to run the client + */ + public boolean init(String[] args) throws ParseException { + + Options opts = new Options(); + opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); + opts.addOption("priority", true, "Application Priority. Default 0"); + opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); + opts.addOption("user", true, "User to run the application as"); + opts.addOption("timeout", true, "Application timeout in milliseconds"); + opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); + opts.addOption("jar", true, "Jar file containing the application master"); + opts.addOption("class", true, "Main class to be run for the Application Master."); + opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); + opts.addOption("shell_script", true, "Location of the shell script to be executed"); + opts.addOption("shell_args", true, "Command line args for the shell script"); + opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); + opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); + opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); + opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); + opts.addOption("log_properties", true, "log4j.properties file"); + opts.addOption("debug", false, "Dump out debug information"); + opts.addOption("help", false, "Print usage"); + CommandLine cliParser = new GnuParser().parse(opts, args); + + if (args.length == 0) { + printUsage(opts); + throw new IllegalArgumentException("No args specified for client to initialize"); + } + + if (cliParser.hasOption("help")) { + printUsage(opts); + return false; + } + + if (cliParser.hasOption("debug")) { + debugFlag = true; + + } + + appName = cliParser.getOptionValue("appname", "DistributedShell"); + amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); + amQueue = cliParser.getOptionValue("queue", ""); + amUser = cliParser.getOptionValue("user", ""); + amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); + + if (amMemory < 0) { + throw new IllegalArgumentException("Invalid memory specified for application master, exiting." + + " Specified memory=" + amMemory); + } + + if (!cliParser.hasOption("jar")) { + throw new IllegalArgumentException("No jar file specified for application master"); + } + + appMasterJar = cliParser.getOptionValue("jar"); + appMasterMainClass = cliParser.getOptionValue("class", + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"); + + if (!cliParser.hasOption("shell_command")) { + throw new IllegalArgumentException("No shell command specified to be executed by application master"); + } + shellCommand = cliParser.getOptionValue("shell_command"); + + if (cliParser.hasOption("shell_script")) { + shellScriptPath = cliParser.getOptionValue("shell_script"); + } + if (cliParser.hasOption("shell_args")) { + shellArgs = cliParser.getOptionValue("shell_args"); + } + if (cliParser.hasOption("shell_env")) { + String envs[] = cliParser.getOptionValues("shell_env"); + for (String env : envs) { + env = env.trim(); + int index = env.indexOf('='); + if (index == -1) { + shellEnv.put(env, ""); + continue; + } + String key = env.substring(0, index); + String val = ""; + if (index < (env.length()-1)) { + val = env.substring(index+1); + } + shellEnv.put(key, val); + } + } + shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); + + containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); + numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); + + if (containerMemory < 0 || numContainers < 1) { + throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." + + " Specified containerMemory=" + containerMemory + + ", numContainer=" + numContainers); + } + + clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); + + log4jPropFile = cliParser.getOptionValue("log_properties", ""); + + return true; + } + + /** + * Main run function for the client + * @return true if application completed successfully + * @throws IOException + */ + public boolean run() throws IOException { + LOG.info("Starting Client"); + + // Connect to ResourceManager + connectToASM(); + assert(applicationsManager != null); + + // Use ClientRMProtocol handle to general cluster information + GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class); + GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq); + LOG.info("Got Cluster metric info from ASM" + + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers()); + + GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); + GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); + LOG.info("Got Cluster node info from ASM"); + for (NodeReport node : clusterNodesResp.getNodeReports()) { + LOG.info("Got node report from ASM for" + + ", nodeId=" + node.getNodeId() + + ", nodeAddress" + node.getHttpAddress() + + ", nodeRackName" + node.getRackName() + + ", nodeNumContainers" + node.getNumContainers() + + ", nodeHealthStatus" + node.getNodeHealthStatus()); + } + + GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class); + GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq); + QueueInfo queueInfo = queueInfoResp.getQueueInfo(); + LOG.info("Queue info" + + ", queueName=" + queueInfo.getQueueName() + + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + + ", queueApplicationCount=" + queueInfo.getApplications().size() + + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); + + GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class); + GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq); + List listAclInfo = queueUserAclsResp.getUserAclsInfoList(); + for (QueueUserACLInfo aclInfo : listAclInfo) { + for (QueueACL userAcl : aclInfo.getUserAcls()) { + LOG.info("User ACL Info for Queue" + + ", queueName=" + aclInfo.getQueueName() + + ", userAcl=" + userAcl.name()); + } + } + + // Get a new application id + GetNewApplicationResponse newApp = getApplication(); + ApplicationId appId = newApp.getApplicationId(); + + // TODO get min/max resource capabilities from RM and change memory ask if needed + // If we do not have min/max, we may not be able to correctly request + // the required resources from the RM for the app master + // Memory ask has to be a multiple of min and less than max. + // Dump out information about cluster capability as seen by the resource manager + int minMem = newApp.getMinimumResourceCapability().getMemory(); + int maxMem = newApp.getMaximumResourceCapability().getMemory(); + LOG.info("Min mem capabililty of resources in this cluster " + minMem); + LOG.info("Max mem capabililty of resources in this cluster " + maxMem); + + // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be + // a multiple of the min value and cannot exceed the max. + // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min + if (amMemory < minMem) { + LOG.info("AM memory specified below min threshold of cluster. Using min value." + + ", specified=" + amMemory + + ", min=" + minMem); + amMemory = minMem; + } + else if (amMemory > maxMem) { + LOG.info("AM memory specified above max threshold of cluster. Using max value." + + ", specified=" + amMemory + + ", max=" + maxMem); + amMemory = maxMem; + } + + // Create launch context for app master + LOG.info("Setting up application submission context for ASM"); + ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); + + // set the application id + appContext.setApplicationId(appId); + // set the application name + appContext.setApplicationName(appName); + + // Set up the container launch context for the application master + ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); + + // set local resources for the application master + // local files or archives as needed + // In this scenario, the jar file for the application master is part of the local resources + Map localResources = new HashMap(); + + LOG.info("Copy App Master jar from local filesystem and add to local environment"); + // Copy the application master jar to the filesystem + // Create a local resource to point to the destination jar path + FileSystem fs = FileSystem.get(conf); + Path src = new Path(appMasterJar); + String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; + Path dst = new Path(fs.getHomeDirectory(), pathSuffix); + fs.copyFromLocalFile(false, true, src, dst); + FileStatus destStatus = fs.getFileStatus(dst); + LocalResource amJarRsrc = Records.newRecord(LocalResource.class); + + // Set the type of resource - file or archive + // archives are untarred at destination + // we don't need the jar file to be untarred for now + amJarRsrc.setType(LocalResourceType.FILE); + // Set visibility of the resource + // Setting to most private option + amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); + // Set the resource to be copied over + amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); + // Set timestamp and length of file so that the framework + // can do basic sanity checks for the local resource + // after it has been copied over to ensure it is the same + // resource the client intended to use with the application + amJarRsrc.setTimestamp(destStatus.getModificationTime()); + amJarRsrc.setSize(destStatus.getLen()); + localResources.put("AppMaster.jar", amJarRsrc); + + // Set the log4j properties if needed + if (!log4jPropFile.isEmpty()) { + Path log4jSrc = new Path(log4jPropFile); + Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); + fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); + FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); + LocalResource log4jRsrc = Records.newRecord(LocalResource.class); + log4jRsrc.setType(LocalResourceType.FILE); + log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); + log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); + log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); + log4jRsrc.setSize(log4jFileStatus.getLen()); + localResources.put("log4j.properties", log4jRsrc); + } + + // The shell script has to be made available on the final container(s) + // where it will be executed. + // To do this, we need to first copy into the filesystem that is visible + // to the yarn framework. + // We do not need to set this as a local resource for the application + // master as the application master does not need it. + String hdfsShellScriptLocation = ""; + long hdfsShellScriptLen = 0; + long hdfsShellScriptTimestamp = 0; + if (!shellScriptPath.isEmpty()) { + Path shellSrc = new Path(shellScriptPath); + String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; + Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); + fs.copyFromLocalFile(false, true, shellSrc, shellDst); + hdfsShellScriptLocation = shellDst.toUri().toString(); + FileStatus shellFileStatus = fs.getFileStatus(shellDst); + hdfsShellScriptLen = shellFileStatus.getLen(); + hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); + } + + // Set local resource info into app master container launch context + amContainer.setLocalResources(localResources); + + // Set the necessary security tokens as needed + //amContainer.setContainerTokens(containerToken); + + // Set the env variables to be setup in the env where the application master will be run + LOG.info("Set the environment for the application master"); + Map env = new HashMap(); + + // put location of shell script into env + // using the env info, the application master will create the correct local resource for the + // eventual containers that will be launched to execute the shell scripts + env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); + env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); + env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); + + // Add AppMaster.jar location to classpath + // At some point we should not be required to add + // the hadoop specific classpaths to the env. + // It should be provided out of the box. + // For now setting all required classpaths including + // the classpath to "." for the application jar + String classPathEnv = "${CLASSPATH}" + + ":./*" + + ":$HADOOP_CONF_DIR" + + ":$HADOOP_COMMON_HOME/share/hadoop/common/*" + + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*" + + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*" + + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*" + + ":$YARN_HOME/modules/*" + + ":$YARN_HOME/lib/*" + + ":./log4j.properties:"; + + // add the runtime classpath needed for tests to work + String testRuntimeClassPath = Client.getTestRuntimeClasspath(); + classPathEnv += ":" + testRuntimeClassPath; + + env.put("CLASSPATH", classPathEnv); + + amContainer.setEnvironment(env); + + // Set the necessary command to execute the application master + Vector vargs = new Vector(30); + + // Set java executable command + LOG.info("Setting up app master command"); + vargs.add("${JAVA_HOME}" + "/bin/java"); + // Set class name + vargs.add(appMasterMainClass); + // Set params for Application Master + vargs.add("--container_memory " + String.valueOf(containerMemory)); + vargs.add("--num_containers " + String.valueOf(numContainers)); + vargs.add("--priority " + String.valueOf(shellCmdPriority)); + if (!shellCommand.isEmpty()) { + vargs.add("--shell_command " + shellCommand + ""); + } + if (!shellArgs.isEmpty()) { + vargs.add("--shell_args " + shellArgs + ""); + } + for (Map.Entry entry : shellEnv.entrySet()) { + vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); + } + if (debugFlag) { + vargs.add("--debug"); + } + + vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); + vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); + + // Get final commmand + StringBuilder command = new StringBuilder(); + for (CharSequence str : vargs) { + command.append(str).append(" "); + } + + LOG.info("Completed setting up app master command " + command.toString()); + List commands = new ArrayList(); + commands.add(command.toString()); + amContainer.setCommands(commands); + + // For launching an AM Container, setting user here is not needed + // Set user in ApplicationSubmissionContext + // amContainer.setUser(amUser); + + // Set up resource type requirements + // For now, only memory is supported so we set memory requirements + Resource capability = Records.newRecord(Resource.class); + capability.setMemory(amMemory); + amContainer.setResource(capability); + + // Service data is a binary blob that can be passed to the application + // Not needed in this scenario + // amContainer.setServiceData(serviceData); + + // The following are not required for launching an application master + // amContainer.setContainerId(containerId); + + appContext.setAMContainerSpec(amContainer); + + // Set the priority for the application master + Priority pri = Records.newRecord(Priority.class); + // TODO - what is the range for priority? how to decide? + pri.setPriority(amPriority); + appContext.setPriority(pri); + + // Set the queue to which this application is to be submitted in the RM + appContext.setQueue(amQueue); + // Set the user submitting this application + // TODO can it be empty? + appContext.setUser(amUser); + + // Create the request to send to the applications manager + SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); + appRequest.setApplicationSubmissionContext(appContext); + + // Submit the application to the applications manager + // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); + // Ignore the response as either a valid response object is returned on success + // or an exception thrown to denote some form of a failure + LOG.info("Submitting application to ASM"); + applicationsManager.submitApplication(appRequest); + + // TODO + // Try submitting the same request again + // app submission failure? + + // Monitor the application + return monitorApplication(appId); + + } + + /** + * Monitor the submitted application for completion. + * Kill application if time expires. + * @param appId Application Id of application to be monitored + * @return true if application completed successfully + * @throws YarnRemoteException + */ + private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException { + + while (true) { + + // Check app status every 1 second. + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + LOG.debug("Thread sleep in monitoring loop interrupted"); + } + + // Get application report for the appId we are interested in + GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); + reportRequest.setApplicationId(appId); + GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest); + ApplicationReport report = reportResponse.getApplicationReport(); + + LOG.info("Got application report from ASM for" + + ", appId=" + appId.getId() + + ", clientToken=" + report.getClientToken() + + ", appDiagnostics=" + report.getDiagnostics() + + ", appMasterHost=" + report.getHost() + + ", appQueue=" + report.getQueue() + + ", appMasterRpcPort=" + report.getRpcPort() + + ", appStartTime=" + report.getStartTime() + + ", yarnAppState=" + report.getYarnApplicationState().toString() + + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + + ", appTrackingUrl=" + report.getTrackingUrl() + + ", appUser=" + report.getUser()); + + YarnApplicationState state = report.getYarnApplicationState(); + FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); + if (YarnApplicationState.FINISHED == state) { + if (FinalApplicationStatus.SUCCEEDED == dsStatus) { + LOG.info("Application has completed successfully. Breaking monitoring loop"); + return true; + } + else { + LOG.info("Application did finished unsuccessfully." + + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + + ". Breaking monitoring loop"); + return false; + } + } + else if (YarnApplicationState.KILLED == state + || YarnApplicationState.FAILED == state) { + LOG.info("Application did not finish." + + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + + ". Breaking monitoring loop"); + return false; + } + + if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { + LOG.info("Reached client specified timeout for application. Killing application"); + killApplication(appId); + return false; + } + } + + } + + /** + * Kill a submitted application by sending a call to the ASM + * @param appId Application Id to be killed. + * @throws YarnRemoteException + */ + private void killApplication(ApplicationId appId) throws YarnRemoteException { + KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); + // TODO clarify whether multiple jobs with the same app id can be submitted and be running at + // the same time. + // If yes, can we kill a particular attempt only? + request.setApplicationId(appId); + // KillApplicationResponse response = applicationsManager.forceKillApplication(request); + // Response can be ignored as it is non-null on success or + // throws an exception in case of failures + applicationsManager.forceKillApplication(request); + } + + /** + * Connect to the Resource Manager/Applications Manager + * @return Handle to communicate with the ASM + * @throws IOException + */ + private void connectToASM() throws IOException { + + /* + UserGroupInformation user = UserGroupInformation.getCurrentUser(); + applicationsManager = user.doAs(new PrivilegedAction() { + public ClientRMProtocol run() { + InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get( + YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); + LOG.info("Connecting to ResourceManager at " + rmAddress); + Configuration appsManagerServerConf = new Configuration(conf); + appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, + ClientRMSecurityInfo.class, SecurityInfo.class); + ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf)); + return asm; + } + }); + */ + YarnConfiguration yarnConf = new YarnConfiguration(conf); + InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get( + YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS)); + LOG.info("Connecting to ResourceManager at " + rmAddress); + applicationsManager = ((ClientRMProtocol) rpc.getProxy( + ClientRMProtocol.class, rmAddress, conf)); + } + + /** + * Get a new application from the ASM + * @return New Application + * @throws YarnRemoteException + */ + private GetNewApplicationResponse getApplication() throws YarnRemoteException { + GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); + GetNewApplicationResponse response = applicationsManager.getNewApplication(request); + LOG.info("Got new application id=" + response.getApplicationId()); + return response; + } + + private static String getTestRuntimeClasspath() { + + InputStream classpathFileStream = null; + BufferedReader reader = null; + String envClassPath = ""; + + LOG.info("Trying to generate classpath for app master from current thread's classpath"); + try { + + // Create classpath from generated classpath + // Check maven ppom.xml for generated classpath info + // Works if compile time env is same as runtime. Mainly tests. + ClassLoader thisClassLoader = + Thread.currentThread().getContextClassLoader(); + String generatedClasspathFile = "yarn-apps-ds-generated-classpath"; + classpathFileStream = + thisClassLoader.getResourceAsStream(generatedClasspathFile); + if (classpathFileStream == null) { + LOG.info("Could not classpath resource from class loader"); + return envClassPath; + } + LOG.info("Readable bytes from stream=" + classpathFileStream.available()); + reader = new BufferedReader(new InputStreamReader(classpathFileStream)); + String cp = reader.readLine(); + if (cp != null) { + envClassPath += cp.trim() + ":"; + } + // Put the file itself on classpath for tasks. + envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile(); + } catch (IOException e) { + LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage()); + } + + try { + if (classpathFileStream != null) { + classpathFileStream.close(); + } + if (reader != null) { + reader.close(); + } + } catch (IOException e) { + LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage()); + } + return envClassPath; + } + +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/DSConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/DSConstants.java new file mode 100644 index 0000000000..b2fb81d0d9 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/DSConstants.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +/** + * Constants used in both Client and Application Master + */ +public class DSConstants { + + /** + * Environment key name pointing to the shell script's location + */ + public static final String DISTRIBUTEDSHELLSCRIPTLOCATION = "DISTRIBUTEDSHELLSCRIPTLOCATION"; + + /** + * Environment key name denoting the file timestamp for the shell script. + * Used to validate the local resource. + */ + public static final String DISTRIBUTEDSHELLSCRIPTTIMESTAMP = "DISTRIBUTEDSHELLSCRIPTTIMESTAMP"; + + /** + * Environment key name denoting the file content length for the shell script. + * Used to validate the local resource. + */ + public static final String DISTRIBUTEDSHELLSCRIPTLEN = "DISTRIBUTEDSHELLSCRIPTLEN"; +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java new file mode 100644 index 0000000000..d0407daa2b --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.server.MiniYARNCluster; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestDistributedShell { + + private static final Log LOG = + LogFactory.getLog(TestDistributedShell.class); + + protected static MiniYARNCluster yarnCluster = null; + protected static Configuration conf = new Configuration(); + + protected static String APPMASTER_JAR = "../hadoop-yarn-applications-distributedshell/target/hadoop-yarn-applications-distributedshell-0.24.0-SNAPSHOT.jar"; + + @BeforeClass + public static void setup() throws InterruptedException, IOException { + LOG.info("Starting up YARN cluster"); + if (yarnCluster == null) { + yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName()); + yarnCluster.init(conf); + yarnCluster.start(); + } + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + LOG.info("setup thread sleep interrupted. message=" + e.getMessage()); + } + } + + @AfterClass + public static void tearDown() throws IOException { + if (yarnCluster != null) { + yarnCluster.stop(); + yarnCluster = null; + } + } + + @Test + public void testDSShell() throws Exception { + + String[] args = { + "--jar", + APPMASTER_JAR, + "--num_containers", + "2", + "--shell_command", + "ls", + "--master_memory", + "1536", + "--container_memory", + "1536" + }; + + LOG.info("Initializing DS Client"); + Client client = new Client(); + boolean initSuccess = client.init(args); + assert(initSuccess); + LOG.info("Running DS Client"); + boolean result = client.run(); + + LOG.info("Client run completed. Result=" + result); + assert (result == true); + + } + + +} + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/pom.xml new file mode 100644 index 0000000000..713731004f --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/pom.xml @@ -0,0 +1,30 @@ + + + + + hadoop-yarn + org.apache.hadoop + ${yarn.version} + + 4.0.0 + org.apache.hadoop + hadoop-yarn-applications + hadoop-yarn-applications + pom + + + hadoop-yarn-applications-distributedshell + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index cb955af8c4..780561d81a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -44,14 +44,7 @@ public class YarnConfiguration extends Configuration { /** ACL of who can modify this application.*/ public static final String APPLICATION_ACL_MODIFY_APP = "yarn.app.acl.modify-job"; - - /** - * Security info class This is an internal config set and - * read by YARN itself. - */ - public static final String YARN_SECURITY_INFO = - "yarn.security.info.class"; - + /** Delay before deleting resource to ease debugging of NM issues */ public static final String DEBUG_NM_DELETE_DELAY_SEC = YarnConfiguration.NM_PREFIX + "delete.debug-delay-sec"; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnProtoRPC.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnProtoRPC.java index 37096ba54c..885682111e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnProtoRPC.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnProtoRPC.java @@ -45,12 +45,9 @@ public class HadoopYarnProtoRPC extends YarnRPC { @Override public Object getProxy(Class protocol, InetSocketAddress addr, Configuration conf) { - Configuration myConf = new Configuration(conf); LOG.info("Creating a HadoopYarnProtoRpc proxy for protocol " + protocol); - LOG.debug("Configured SecurityInfo class name is " - + myConf.get(YarnConfiguration.YARN_SECURITY_INFO)); - - return RpcFactoryProvider.getClientFactory(myConf).getClient(protocol, 1, addr, myConf); + return RpcFactoryProvider.getClientFactory(conf).getClient(protocol, 1, + addr, conf); } @Override @@ -60,8 +57,6 @@ public Server getServer(Class protocol, Object instance, int numHandlers) { LOG.info("Creating a HadoopYarnProtoRpc server for protocol " + protocol + " with " + numHandlers + " handlers"); - LOG.info("Configured SecurityInfo class name is " - + conf.get(YarnConfiguration.YARN_SECURITY_INFO)); final RPC.Server hadoopServer; hadoopServer = diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnRPC.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnRPC.java index 3358189a4a..9bd2ff03f3 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnRPC.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/HadoopYarnRPC.java @@ -46,13 +46,10 @@ public class HadoopYarnRPC extends YarnRPC { @Override public Object getProxy(Class protocol, InetSocketAddress addr, Configuration conf) { - Configuration myConf = new Configuration(conf); LOG.info("Creating a HadoopYarnRpc proxy for protocol " + protocol); - LOG.debug("Configured SecurityInfo class name is " - + myConf.get(YarnConfiguration.YARN_SECURITY_INFO)); - RPC.setProtocolEngine(myConf, protocol, AvroSpecificRpcEngine.class); + RPC.setProtocolEngine(conf, protocol, AvroSpecificRpcEngine.class); try { - return RPC.getProxy(protocol, 1, addr, myConf); + return RPC.getProxy(protocol, 1, addr, conf); } catch (IOException e) { throw new YarnException(e); } @@ -65,8 +62,6 @@ public Server getServer(Class protocol, Object instance, int numHandlers) { LOG.info("Creating a HadoopYarnRpc server for protocol " + protocol + " with " + numHandlers + " handlers"); - LOG.info("Configured SecurityInfo class name is " - + conf.get(YarnConfiguration.YARN_SECURITY_INFO)); RPC.setProtocolEngine(conf, protocol, AvroSpecificRpcEngine.class); final RPC.Server hadoopServer; try { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java index c2510bca3d..313e8333b7 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java @@ -44,7 +44,8 @@ public class ContainerTokenIdentifier extends TokenIdentifier { private String nmHostName; private Resource resource; - public ContainerTokenIdentifier(ContainerId containerID, String hostName, Resource r) { + public ContainerTokenIdentifier(ContainerId containerID, String hostName, + Resource r) { this.containerId = containerID; this.nmHostName = hostName; this.resource = r; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java index 7ec367292e..9c485bf3ef 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java @@ -24,9 +24,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -69,12 +70,12 @@ public int compare(ContainerId c1, } } - public static class ResourceRequestComparator + public static class ResourceRequestComparator implements java.util.Comparator { @Override public int compare(org.apache.hadoop.yarn.api.records.ResourceRequest r1, org.apache.hadoop.yarn.api.records.ResourceRequest r2) { - + // Compare priority, host and capability int ret = r1.getPriority().compareTo(r2.getPriority()); if (ret == 0) { @@ -198,12 +199,12 @@ public static Container newContainer(RecordFactory recordFactory, String nodeHttpAddress, Resource resource, Priority priority) { ContainerId containerID = newContainerId(recordFactory, appAttemptId, containerId); - return newContainer(containerID, nodeId, nodeHttpAddress, + return newContainer(containerID, nodeId, nodeHttpAddress, resource, priority); } public static Container newContainer(ContainerId containerId, - NodeId nodeId, String nodeHttpAddress, + NodeId nodeId, String nodeHttpAddress, Resource resource, Priority priority) { Container container = recordFactory.newRecordInstance(Container.class); container.setId(containerId); @@ -242,8 +243,9 @@ public static ResourceRequest newResourceRequest(ResourceRequest r) { public static ApplicationReport newApplicationReport( ApplicationId applicationId, String user, String queue, String name, - String host, int rpcPort, String clientToken, ApplicationState state, - String diagnostics, String url, long startTime, long finishTime) { + String host, int rpcPort, String clientToken, YarnApplicationState state, + String diagnostics, String url, long startTime, long finishTime, + FinalApplicationStatus finalStatus) { ApplicationReport report = recordFactory .newRecordInstance(ApplicationReport.class); report.setApplicationId(applicationId); @@ -253,20 +255,21 @@ public static ApplicationReport newApplicationReport( report.setHost(host); report.setRpcPort(rpcPort); report.setClientToken(clientToken); - report.setState(state); + report.setYarnApplicationState(state); report.setDiagnostics(diagnostics); report.setTrackingUrl(url); report.setStartTime(startTime); report.setFinishTime(finishTime); + report.setFinalApplicationStatus(finalStatus); return report; } - + public static Resource newResource(int memory) { Resource resource = recordFactory.newRecordInstance(Resource.class); resource.setMemory(memory); return resource; } - + public static URL newURL(String scheme, String host, int port, String file) { URL url = recordFactory.newRecordInstance(URL.class); url.setScheme(scheme); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java index 6f5e904319..296c9d75a1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java @@ -33,7 +33,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -181,4 +183,5 @@ public static ApplicationAttemptId toApplicationAttemptId( + applicationAttmeptIdStr, n); } } + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java index 7d233e2d9f..ade32ffa28 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java @@ -24,7 +24,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.util.Records; import com.google.common.collect.Iterators; @@ -39,8 +40,8 @@ public class MockApps { "I18nApp<☯>"); static final Iterator USERS = Iterators.cycle("dorothy", "tinman", "scarecrow", "glinda", "nikko", "toto", "winkie", "zeke", "gulch"); - static final Iterator STATES = Iterators.cycle( - ApplicationState.values()); + static final Iterator STATES = Iterators.cycle( + YarnApplicationState.values()); static final Iterator QUEUES = Iterators.cycle("a.a1", "a.a2", "b.b1", "b.b2", "b.b3", "c.c1.c11", "c.c1.c12", "c.c1.c13", "c.c2", "c.c3", "c.c4"); @@ -74,46 +75,47 @@ public static List genApps(int n) { public static ApplicationReport newApp(int i) { final ApplicationId id = newAppID(i); - final ApplicationState state = newAppState(); + final YarnApplicationState state = newAppState(); final String user = newUserName(); final String name = newAppName(); final String queue = newQueue(); + final FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; return new ApplicationReport() { @Override public ApplicationId getApplicationId() { return id; } @Override public String getUser() { return user; } @Override public String getName() { return name; } - @Override public ApplicationState getState() { return state; } + @Override public YarnApplicationState getYarnApplicationState() { return state; } @Override public String getQueue() { return queue; } @Override public String getTrackingUrl() { return ""; } - @Override + @Override public FinalApplicationStatus getFinalApplicationStatus() { return finishState; } public void setApplicationId(ApplicationId applicationId) { // TODO Auto-generated method stub - + } @Override public void setTrackingUrl(String url) { // TODO Auto-generated method stub - + } @Override public void setName(String name) { // TODO Auto-generated method stub - + } @Override public void setQueue(String queue) { // TODO Auto-generated method stub - + } @Override - public void setState(ApplicationState state) { + public void setYarnApplicationState(YarnApplicationState state) { // TODO Auto-generated method stub - + } @Override public void setUser(String user) { // TODO Auto-generated method stub - + } @Override public String getDiagnostics() { @@ -123,7 +125,7 @@ public String getDiagnostics() { @Override public void setDiagnostics(String diagnostics) { // TODO Auto-generated method stub - + } @Override public String getHost() { @@ -133,7 +135,7 @@ public String getHost() { @Override public void setHost(String host) { // TODO Auto-generated method stub - + } @Override public int getRpcPort() { @@ -143,7 +145,7 @@ public int getRpcPort() { @Override public void setRpcPort(int rpcPort) { // TODO Auto-generated method stub - + } @Override public String getClientToken() { @@ -153,9 +155,8 @@ public String getClientToken() { @Override public void setClientToken(String clientToken) { // TODO Auto-generated method stub - + } - @Override public long getStartTime() { // TODO Auto-generated method stub @@ -175,7 +176,11 @@ public long getFinishTime() { @Override public void setFinishTime(long finishTime) { // TODO Auto-generated method stub - + + } + @Override + public void setFinalApplicationStatus(FinalApplicationStatus finishState) { + // TODO Auto-generated method stub } }; } @@ -194,9 +199,10 @@ public static ApplicationAttemptId newAppAttemptID(ApplicationId appId, int i) { return id; } - public static ApplicationState newAppState() { + public static YarnApplicationState newAppState() { synchronized(STATES) { return STATES.next(); } } + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index b869729337..3896713295 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -32,6 +33,13 @@ */ public interface Context { + /** + * Return the nodeId. Usable only when the ContainerManager is started. + * + * @return the NodeId + */ + NodeId getNodeId(); + ConcurrentMap getApplications(); ConcurrentMap getContainers(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 8dc16e97df..068d6f5f8a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; @@ -48,6 +49,7 @@ import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager; import org.apache.hadoop.yarn.service.CompositeService; import org.apache.hadoop.yarn.service.Service; +import org.apache.hadoop.yarn.util.Records; public class NodeManager extends CompositeService { private static final Log LOG = LogFactory.getLog(NodeManager.class); @@ -161,6 +163,7 @@ public void stop() { public static class NMContext implements Context { + private final NodeId nodeId = Records.newRecord(NodeId.class); private final ConcurrentMap applications = new ConcurrentHashMap(); private final ConcurrentMap containers = @@ -175,6 +178,14 @@ public NMContext() { this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis()); } + /** + * Usable only after ContainerManager is started. + */ + @Override + public NodeId getNodeId() { + return this.nodeId; + } + @Override public ConcurrentMap getApplications() { return this.applications; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java index caf3a72919..2b10c9717a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java @@ -24,8 +24,5 @@ public interface NodeStatusUpdater extends Service { byte[] getRMNMSharedSecret(); - String getContainerManagerBindAddress(); - void sendOutofBandHeartBeat(); - } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 1b1fd46b9e..f5d0c528b2 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -18,7 +18,6 @@ package org.apache.hadoop.yarn.server.nodemanager; -import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Iterator; @@ -57,7 +56,6 @@ import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager; import org.apache.hadoop.yarn.service.AbstractService; -import org.apache.hadoop.yarn.util.Records; public class NodeStatusUpdaterImpl extends AbstractService implements NodeStatusUpdater { @@ -69,16 +67,13 @@ public class NodeStatusUpdaterImpl extends AbstractService implements private final Context context; private final Dispatcher dispatcher; + private NodeId nodeId; private ContainerTokenSecretManager containerTokenSecretManager; private long heartBeatInterval; private ResourceTracker resourceTracker; private String rmAddress; private Resource totalResource; - private String containerManagerBindAddress; - private String hostName; - private int containerManagerPort; private int httpPort; - private NodeId nodeId; private byte[] secretKeyBytes = new byte[0]; private boolean isStopped; private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); @@ -114,24 +109,18 @@ public synchronized void init(Configuration conf) { @Override public void start() { - String cmBindAddressStr = - getConfig().get(YarnConfiguration.NM_ADDRESS, - YarnConfiguration.DEFAULT_NM_ADDRESS); - InetSocketAddress cmBindAddress = - NetUtils.createSocketAddr(cmBindAddressStr); + + // NodeManager is the last service to start, so NodeId is available. + this.nodeId = this.context.getNodeId(); + String httpBindAddressStr = getConfig().get(YarnConfiguration.NM_WEBAPP_ADDRESS, YarnConfiguration.DEFAULT_NM_WEBAPP_ADDRESS); InetSocketAddress httpBindAddress = NetUtils.createSocketAddr(httpBindAddressStr); try { - this.hostName = InetAddress.getLocalHost().getHostAddress(); - this.containerManagerPort = cmBindAddress.getPort(); + // this.hostName = InetAddress.getLocalHost().getCanonicalHostName(); this.httpPort = httpBindAddress.getPort(); - this.containerManagerBindAddress = - this.hostName + ":" + this.containerManagerPort; - LOG.info("Configured ContainerManager Address is " - + this.containerManagerBindAddress); // Registration has to be in start so that ContainerManager can get the // perNM tokens needed to authenticate ContainerTokens. registerWithRM(); @@ -150,14 +139,11 @@ public synchronized void stop() { } protected ResourceTracker getRMClient() { - YarnRPC rpc = YarnRPC.create(getConfig()); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); InetSocketAddress rmAddress = NetUtils.createSocketAddr(this.rmAddress); - Configuration rmClientConf = new Configuration(getConfig()); - rmClientConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - RMNMSecurityInfoClass.class, SecurityInfo.class); return (ResourceTracker) rpc.getProxy(ResourceTracker.class, rmAddress, - rmClientConf); + conf); } private void registerWithRM() throws YarnRemoteException { @@ -165,9 +151,6 @@ private void registerWithRM() throws YarnRemoteException { LOG.info("Connected to ResourceManager at " + this.rmAddress); RegisterNodeManagerRequest request = recordFactory.newRecordInstance(RegisterNodeManagerRequest.class); - this.nodeId = Records.newRecord(NodeId.class); - this.nodeId.setHost(this.hostName); - this.nodeId.setPort(this.containerManagerPort); request.setHttpPort(this.httpPort); request.setResource(this.totalResource); request.setNodeId(this.nodeId); @@ -183,19 +166,14 @@ private void registerWithRM() throws YarnRemoteException { // It is expected that status updater is started by this point and // RM gives the shared secret in registration during StatusUpdater#start(). this.containerTokenSecretManager.setSecretKey( - this.getContainerManagerBindAddress(), + this.nodeId.toString(), this.getRMNMSharedSecret()); } - LOG.info("Registered with ResourceManager as " + this.containerManagerBindAddress + LOG.info("Registered with ResourceManager as " + this.nodeId + " with total resource of " + this.totalResource); } - @Override - public String getContainerManagerBindAddress() { - return this.containerManagerBindAddress; - } - @Override public byte[] getRMNMSharedSecret() { return this.secretKeyBytes.clone(); @@ -230,8 +208,8 @@ private NodeStatus getNodeStatus() { } nodeStatus.setContainersStatuses(containersStatuses); - LOG.debug(this.containerManagerBindAddress + " sending out status for " + numActiveContainers - + " containers"); + LOG.debug(this.nodeId + " sending out status for " + + numActiveContainers + " containers"); NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus(); if (this.healthChecker != null) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 8e90552d86..3c92c0b53c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -21,7 +21,9 @@ import static org.apache.hadoop.yarn.service.Service.STATE.STARTED; import java.io.IOException; +import java.net.InetAddress; import java.net.InetSocketAddress; +import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.Map; @@ -36,6 +38,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.ContainerManager; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse; @@ -99,7 +102,6 @@ public class ContainerManagerImpl extends CompositeService implements final Context context; private final ContainersMonitor containersMonitor; private Server server; - private InetSocketAddress cmBindAddressStr; private final ResourceLocalizationService rsrcLocalizationSrvc; private final ContainersLauncher containersLauncher; private final AuxServices auxiluaryServices; @@ -144,7 +146,7 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec, addService(this.containersMonitor); LogAggregationService logAggregationService = - createLogAggregationService(this.deletionService); + createLogAggregationService(this.context, this.deletionService); addService(logAggregationService); dispatcher.register(ContainerEventType.class, @@ -159,9 +161,9 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec, addService(dispatcher); } - protected LogAggregationService createLogAggregationService( + protected LogAggregationService createLogAggregationService(Context context, DeletionService deletionService) { - return new LogAggregationService(deletionService); + return new LogAggregationService(context, deletionService); } public ContainersMonitor getContainersMonitor() { @@ -179,29 +181,33 @@ protected ContainersLauncher createContainersLauncher(Context context, return new ContainersLauncher(context, this.dispatcher, exec); } - @Override - public void init(Configuration conf) { - cmBindAddressStr = NetUtils.createSocketAddr( - conf.get(YarnConfiguration.NM_ADDRESS, YarnConfiguration.DEFAULT_NM_ADDRESS)); - super.init(conf); - } - @Override public void start() { // Enqueue user dirs in deletion context - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration cmConf = new Configuration(getConfig()); - cmConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - ContainerManagerSecurityInfo.class, SecurityInfo.class); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); + + InetSocketAddress initialAddress = NetUtils.createSocketAddr(conf.get( + YarnConfiguration.NM_ADDRESS, YarnConfiguration.DEFAULT_NM_ADDRESS)); + server = - rpc.getServer(ContainerManager.class, this, cmBindAddressStr, cmConf, + rpc.getServer(ContainerManager.class, this, initialAddress, conf, this.containerTokenSecretManager, - cmConf.getInt(YarnConfiguration.NM_CONTAINER_MGR_THREAD_COUNT, + conf.getInt(YarnConfiguration.NM_CONTAINER_MGR_THREAD_COUNT, YarnConfiguration.DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT)); - LOG.info("ContainerManager started at " + cmBindAddressStr); server.start(); + InetAddress hostNameResolved = null; + try { + hostNameResolved = InetAddress.getLocalHost(); + } catch (UnknownHostException e) { + throw new YarnException(e); + } + this.context.getNodeId().setHost(hostNameResolved.getCanonicalHostName()); + this.context.getNodeId().setPort(server.getPort()); + LOG.info("ContainerManager started at " + + this.context.getNodeId().toString()); super.start(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java index fc302c2481..4c9a9bf521 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java @@ -114,15 +114,9 @@ public ContainerLocalizer(FileContext lfs, String user, String appId, } LocalizationProtocol getProxy(final InetSocketAddress nmAddr) { - Configuration localizerConf = new Configuration(); - YarnRPC rpc = YarnRPC.create(localizerConf); - if (UserGroupInformation.isSecurityEnabled()) { - localizerConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - LocalizerSecurityInfo.class, SecurityInfo.class); - } + YarnRPC rpc = YarnRPC.create(conf); return (LocalizationProtocol) - rpc.getProxy(LocalizationProtocol.class, nmAddr, localizerConf); + rpc.getProxy(LocalizationProtocol.class, nmAddr, conf); } public int runLocalization(final InetSocketAddress nmAddr) diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index ead650fab8..89d523436c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -227,13 +227,10 @@ LocalizerTracker createLocalizerTracker(Configuration conf) { } Server createServer() { - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration conf = new Configuration(getConfig()); // Clone to separate - // sec-info classes + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); LocalizerTokenSecretManager secretManager = null; if (UserGroupInformation.isSecurityEnabled()) { - conf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - LocalizerSecurityInfo.class, SecurityInfo.class); secretManager = new LocalizerTokenSecretManager(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java index 974455c460..538bc4607f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation; import java.net.InetAddress; -import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.security.PrivilegedExceptionAction; import java.util.concurrent.ConcurrentHashMap; @@ -32,7 +31,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; @@ -42,6 +40,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.event.LogAggregatorEvent; import org.apache.hadoop.yarn.service.AbstractService; @@ -53,6 +52,7 @@ public class LogAggregationService extends AbstractService implements private static final Log LOG = LogFactory .getLog(LogAggregationService.class); + private final Context context; private final DeletionService deletionService; private String[] localRootLogDirs; @@ -63,8 +63,10 @@ public class LogAggregationService extends AbstractService implements private final ExecutorService threadPool; - public LogAggregationService(DeletionService deletionService) { + public LogAggregationService(Context context, + DeletionService deletionService) { super(LogAggregationService.class.getName()); + this.context = context; this.deletionService = deletionService; this.appLogAggregators = new ConcurrentHashMap(); @@ -82,16 +84,9 @@ public synchronized void init(Configuration conf) { @Override public synchronized void start() { - String address = - getConfig().get(YarnConfiguration.NM_ADDRESS, YarnConfiguration.DEFAULT_NM_ADDRESS); - InetSocketAddress cmBindAddress = NetUtils.createSocketAddr(address); - try { - this.nodeFile = - InetAddress.getLocalHost().getHostAddress() + "_" - + cmBindAddress.getPort(); - } catch (UnknownHostException e) { - throw new YarnException(e); - } + // NodeId is only available during start, the following cannot be moved + // anywhere else. + this.nodeFile = this.context.getNodeId().toString(); super.start(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java index c4db3b6af9..bdf8451761 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java @@ -144,9 +144,9 @@ public void handle(ContainersLauncherEvent event) { } @Override - protected LogAggregationService createLogAggregationService( + protected LogAggregationService createLogAggregationService(Context context, DeletionService deletionService) { - return new LogAggregationService(deletionService) { + return new LogAggregationService(context, deletionService) { @Override public void handle(LogAggregatorEvent event) { switch (event.getType()) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 2f0e3f54d3..63d1ade7c4 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -104,7 +104,7 @@ public RegisterNodeManagerResponse registerNodeManager( Resource resource = request.getResource(); LOG.info("Registering " + nodeId.toString()); try { - Assert.assertEquals(InetAddress.getLocalHost().getHostAddress() + Assert.assertEquals(InetAddress.getLocalHost().getCanonicalHostName() + ":12345", nodeId.toString()); } catch (UnknownHostException e) { Assert.fail(e.getMessage()); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 2a366ec2fc..0eae0aab03 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -29,7 +29,6 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; -import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.yarn.api.ContainerManager; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -41,7 +40,6 @@ import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager; import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; @@ -54,6 +52,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager; import org.apache.hadoop.yarn.service.Service.STATE; import org.junit.After; import org.junit.Before; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 164039e037..449757f9ce 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -98,7 +98,7 @@ public void testLocalFileDeletionAfterUpload() throws IOException { this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath()); LogAggregationService logAggregationService = - new LogAggregationService(this.delSrvc); + new LogAggregationService(this.context, this.delSrvc); logAggregationService.init(this.conf); logAggregationService.start(); @@ -146,7 +146,7 @@ public void testNoContainerOnNode() { this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath()); LogAggregationService logAggregationService = - new LogAggregationService(this.delSrvc); + new LogAggregationService(this.context, this.delSrvc); logAggregationService.init(this.conf); logAggregationService.start(); @@ -179,7 +179,7 @@ public void testMultipleAppsLogAggregation() throws IOException { this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath()); LogAggregationService logAggregationService = - new LogAggregationService(this.delSrvc); + new LogAggregationService(this.context, this.delSrvc); logAggregationService.init(this.conf); logAggregationService.start(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 6237f8961f..7be950f3d1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -92,15 +92,12 @@ public void init(Configuration conf) { } public void start() { - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration serverConf = new Configuration(getConfig()); - serverConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); this.server = rpc.getServer(RMAdminProtocol.class, this, masterServiceAddress, - serverConf, null, - serverConf.getInt(YarnConfiguration.RM_ADMIN_CLIENT_THREAD_COUNT, + conf, null, + conf.getInt(YarnConfiguration.RM_ADMIN_CLIENT_THREAD_COUNT, YarnConfiguration.DEFAULT_RM_ADMIN_CLIENT_THREAD_COUNT)); this.server.start(); super.start(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 436ed23d6e..bcc8c08dae 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -75,7 +75,7 @@ public class ApplicationMasterService extends AbstractService implements new ConcurrentHashMap(); private final AMResponse reboot = recordFactory.newRecordInstance(AMResponse.class); private final RMContext rmContext; - + public ApplicationMasterService(RMContext rmContext, ApplicationTokenSecretManager appTokenManager, YarnScheduler scheduler) { super(ApplicationMasterService.class.getName()); @@ -98,19 +98,17 @@ public void init(Configuration conf) { @Override public void start() { - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration serverConf = new Configuration(getConfig()); - serverConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); this.server = rpc.getServer(AMRMProtocol.class, this, masterServiceAddress, - serverConf, this.appTokenManager, - serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT, + conf, this.appTokenManager, + conf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT, YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT)); this.server.start(); super.start(); } - + @Override public RegisterApplicationMasterResponse registerApplicationMaster( RegisterApplicationMasterRequest request) throws YarnRemoteException { @@ -123,7 +121,7 @@ public RegisterApplicationMasterResponse registerApplicationMaster( String message = "Application doesn't exist in cache " + applicationAttemptId; LOG.error(message); - RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), + RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, message, "ApplicationMasterService", "Error in registering application master", appID, applicationAttemptId); @@ -141,7 +139,7 @@ public RegisterApplicationMasterResponse registerApplicationMaster( .getHost(), request.getRpcPort(), request.getTrackingUrl())); RMAuditLogger.logSuccess(this.rmContext.getRMApps().get(appID).getUser(), - AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, + AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, applicationAttemptId); // Pick up min/max resource from scheduler... @@ -176,7 +174,7 @@ public FinishApplicationMasterResponse finishApplicationMaster( rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptUnregistrationEvent(applicationAttemptId, request - .getTrackingUrl(), request.getFinalState(), request + .getTrackingUrl(), request.getFinalApplicationStatus(), request .getDiagnostics())); FinishApplicationMasterResponse response = recordFactory @@ -225,7 +223,7 @@ public AllocateResponse allocate(AllocateRequest request) List release = request.getReleaseList(); // Send new requests to appAttempt. - Allocation allocation = + Allocation allocation = this.rScheduler.allocate(appAttemptId, ask, release); RMApp app = this.rmContext.getRMApps().get(appAttemptId.getApplicationId()); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 2cf1900098..01eab2111b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -125,16 +125,13 @@ public void init(Configuration conf) { public void start() { // All the clients to appsManager are supposed to be authenticated via // Kerberos if security is enabled, so no secretManager. - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration clientServerConf = new Configuration(getConfig()); - clientServerConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ClientRMSecurityInfo.class, SecurityInfo.class); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); this.server = rpc.getServer(ClientRMProtocol.class, this, clientBindAddress, - clientServerConf, null, - clientServerConf.getInt(YarnConfiguration.RM_CLIENT_THREAD_COUNT, + conf, null, + conf.getInt(YarnConfiguration.RM_CLIENT_THREAD_COUNT, YarnConfiguration.DEFAULT_RM_CLIENT_THREAD_COUNT)); this.server.start(); super.start(); @@ -351,7 +348,7 @@ private NodeReport createNodeReports(RMNode rmNode) { report.setNodeHealthStatus(rmNode.getNodeHealthStatus()); org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport schedulerNodeReport = scheduler .getNodeReport(rmNode.getNodeID()); - report.setUsed(schedulerNodeReport.getUsedResources()); + report.setUsed(schedulerNodeReport.getUsedResource()); report.setNumContainers(schedulerNodeReport.getNumContainers()); return report; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index d8c2d2c543..a80736d4bd 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -109,15 +109,12 @@ public synchronized void start() { super.start(); // ResourceTrackerServer authenticates NodeManager via Kerberos if // security is enabled, so no secretManager. - YarnRPC rpc = YarnRPC.create(getConfig()); - Configuration rtServerConf = new Configuration(getConfig()); - rtServerConf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - RMNMSecurityInfoClass.class, SecurityInfo.class); + Configuration conf = getConfig(); + YarnRPC rpc = YarnRPC.create(conf); this.server = rpc.getServer(ResourceTracker.class, this, resourceTrackerAddress, - rtServerConf, null, - rtServerConf.getInt(YarnConfiguration.RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT, + conf, null, + conf.getInt(YarnConfiguration.RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT, YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT)); this.server.start(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index 337f481689..07aac74f79 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -92,12 +92,9 @@ public AMLauncher(RMContext rmContext, RMAppAttempt application, ApplicationTokenSecretManager applicationTokenSecretManager, ClientToAMSecretManager clientToAMSecretManager, Configuration conf) { this.application = application; - this.conf = new Configuration(conf); // Just not to touch the sec-info class + this.conf = conf; this.applicationTokenSecretManager = applicationTokenSecretManager; this.clientToAMSecretManager = clientToAMSecretManager; - this.conf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ContainerManagerSecurityInfo.class, SecurityInfo.class); this.eventType = eventType; this.handler = rmContext.getDispatcher().getEventHandler(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index 6e63e2248d..0ea9202fcb 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -20,6 +20,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -28,8 +29,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; /** - * The read interface to an Application in the ResourceManager. Take a - * look at {@link RMAppImpl} for its implementation. This interface + * The read interface to an Application in the ResourceManager. Take a + * look at {@link RMAppImpl} for its implementation. This interface * exposes methods to access various updates in application status/report. */ public interface RMApp extends EventHandler { @@ -68,7 +69,7 @@ public interface RMApp extends EventHandler { RMAppAttempt getRMAppAttempt(ApplicationAttemptId appAttemptId); /** - * Each Application is submitted to a queue decided by {@link + * Each Application is submitted to a queue decided by {@link * ApplicationSubmissionContext#setQueue(String)}. * This method returns the queue to which an application was submitted. * @return the queue to which the application was submitted to. @@ -76,7 +77,7 @@ public interface RMApp extends EventHandler { String getQueue(); /** - * The name of the application as set in {@link + * The name of the application as set in {@link * ApplicationSubmissionContext#setApplicationName(String)}. * @return the name of the application. */ @@ -85,7 +86,7 @@ public interface RMApp extends EventHandler { /** * {@link RMApp} can have multiple application attempts {@link RMAppAttempt}. * This method returns the current {@link RMAppAttempt}. - * @return the current {@link RMAppAttempt} + * @return the current {@link RMAppAttempt} */ RMAppAttempt getCurrentAppAttempt(); @@ -96,7 +97,7 @@ public interface RMApp extends EventHandler { ApplicationReport createAndGetApplicationReport(); /** - * Application level metadata is stored in {@link ApplicationStore} whicn + * Application level metadata is stored in {@link ApplicationStore} whicn * can persist the information. * @return the {@link ApplicationStore} for this {@link RMApp}. */ @@ -125,12 +126,12 @@ public interface RMApp extends EventHandler { * @return the diagnostics information for the application master. */ StringBuilder getDiagnostics(); - + /** - * The final state of the AM when unregistering as in - * {@link FinishApplicationMasterRequest#setFinalState(String)}. - * @return the final state of the AM as set in - * {@link FinishApplicationMasterRequest#setFinalState(String)}. + * The final finish state of the AM when unregistering as in + * {@link FinishApplicationMasterRequest#setFinishApplicationStatus(FinalApplicationStatus)}. + * @return the final finish state of the AM as set in + * {@link FinishApplicationMasterRequest#setFinishApplicationStatus(FinalApplicationStatus)}. */ - String getAMFinalState(); + FinalApplicationStatus getFinalApplicationStatus(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 94d04a8d12..8cdccfdddd 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -32,9 +32,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -94,7 +95,7 @@ public class RMAppImpl implements RMApp { private static final StateMachineFactory stateMachineFactory + RMAppEvent> stateMachineFactory = new StateMachineFactory { @@ -49,7 +50,7 @@ public interface RMAppAttempt extends EventHandler { * @return the state {@link RMAppAttemptState} of this {@link RMAppAttempt} */ RMAppAttemptState getAppAttemptState(); - + /** * The host on which the {@link RMAppAttempt} is running/ran on. * @return the host on which the {@link RMAppAttempt} ran/is running on. @@ -88,11 +89,12 @@ public interface RMAppAttempt extends EventHandler { float getProgress(); /** - * The final state set by the AM. - * @return the final state that is set by the AM when unregistering itself. + * The final status set by the AM. + * @return the final status that is set by the AM when unregistering itself. Can return a null + * if the AM has not unregistered itself. */ - String getAMFinalState(); - + FinalApplicationStatus getFinalApplicationStatus(); + /** * Nodes on which the containers for this {@link RMAppAttempt} ran. * @return the set of nodes that ran any containers from this {@link RMAppAttempt} @@ -100,16 +102,16 @@ public interface RMAppAttempt extends EventHandler { Set getRanNodes(); /** - * Return a list of the last set of finished containers, resetting the + * Return a list of the last set of finished containers, resetting the * finished containers to empty. * @return the list of just finished containers, re setting the finished containers. */ List pullJustFinishedContainers(); /** - * Return the list of last set of finished containers. This does not reset the + * Return the list of last set of finished containers. This does not reset the * finished containers. - * @return the list of just finished contianers, this does not reset the + * @return the list of just finished contianers, this does not reset the * finished containers. */ List getJustFinishedContainers(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 7f8ff82d6a..81aae50201 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -31,6 +31,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; @@ -99,9 +100,9 @@ public class RMAppAttemptImpl implements RMAppAttempt { private final ApplicationSubmissionContext submissionContext; //nodes on while this attempt's containers ran - private final Set ranNodes = + private final Set ranNodes = new HashSet(); - private final List justFinishedContainers = + private final List justFinishedContainers = new ArrayList(); private Container masterContainer; @@ -109,7 +110,9 @@ public class RMAppAttemptImpl implements RMAppAttempt { private String host = "N/A"; private int rpcPort; private String trackingUrl = "N/A"; - private String finalState = "N/A"; + // Set to null initially. Will eventually get set + // if an RMAppAttemptUnregistrationEvent occurs + private FinalApplicationStatus finalStatus = null; private final StringBuilder diagnostics = new StringBuilder(); private static final StateMachineFactory EMPTY_CONTAINER_RELEASE_LIST = + private static final List EMPTY_CONTAINER_RELEASE_LIST = new ArrayList(); - private static final List EMPTY_CONTAINER_REQUEST_LIST = + private static final List EMPTY_CONTAINER_REQUEST_LIST = new ArrayList(); private static final class ScheduleTransition extends BaseTransition { @@ -540,23 +543,23 @@ public void transition(RMAppAttemptImpl appAttempt, switch (finalAttemptState) { case FINISHED: { - appEvent = + appEvent = new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED); } break; case KILLED: { - appEvent = - new RMAppFailedAttemptEvent(applicationId, - RMAppEventType.ATTEMPT_KILLED, + appEvent = + new RMAppFailedAttemptEvent(applicationId, + RMAppEventType.ATTEMPT_KILLED, "Application killed by user."); } break; case FAILED: { - appEvent = - new RMAppFailedAttemptEvent(applicationId, - RMAppEventType.ATTEMPT_FAILED, + appEvent = + new RMAppFailedAttemptEvent(applicationId, + RMAppEventType.ATTEMPT_FAILED, appAttempt.getDiagnostics()); } break; @@ -566,7 +569,7 @@ public void transition(RMAppAttemptImpl appAttempt, } break; } - + appAttempt.eventHandler.handle(appEvent); appAttempt.eventHandler.handle(new AppRemovedSchedulerEvent(appAttempt .getAppAttemptId(), finalAttemptState)); @@ -657,7 +660,7 @@ public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptContainerFinishedEvent finishEvent = ((RMAppAttemptContainerFinishedEvent)event); - + // UnRegister from AMLivelinessMonitor appAttempt.rmContext.getAMLivelinessMonitor().unregister( appAttempt.getAppAttemptId()); @@ -666,7 +669,7 @@ public void transition(RMAppAttemptImpl appAttempt, ContainerStatus status = finishEvent.getContainerStatus(); appAttempt.diagnostics.append("AM Container for " + appAttempt.getAppAttemptId() + " exited with " + - " exitCode: " + status.getExitStatus() + + " exitCode: " + status.getExitStatus() + " due to: " + status.getDiagnostics() + "." + "Failing this attempt."); @@ -730,10 +733,9 @@ public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptUnregistrationEvent unregisterEvent = (RMAppAttemptUnregistrationEvent) event; - unregisterEvent.getFinalState(); appAttempt.diagnostics.append(unregisterEvent.getDiagnostics()); appAttempt.trackingUrl = unregisterEvent.getTrackingUrl(); - appAttempt.finalState = unregisterEvent.getFinalState(); + appAttempt.finalStatus = unregisterEvent.getFinalApplicationStatus(); // Tell the app and the scheduler super.transition(appAttempt, event); @@ -761,7 +763,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, RMAppAttemptContainerFinishedEvent containerFinishedEvent = (RMAppAttemptContainerFinishedEvent) event; - ContainerStatus containerStatus = + ContainerStatus containerStatus = containerFinishedEvent.getContainerStatus(); // Is this container the AmContainer? If the finished container is same as @@ -771,7 +773,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, // Setup diagnostic message appAttempt.diagnostics.append("AM Container for " + appAttempt.getAppAttemptId() + " exited with " + - " exitCode: " + containerStatus.getExitStatus() + + " exitCode: " + containerStatus.getExitStatus() + " due to: " + containerStatus.getDiagnostics() + "." + "Failing this attempt."); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUnregistrationEvent.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUnregistrationEvent.java index 3511a03eed..5ea461121c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUnregistrationEvent.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUnregistrationEvent.java @@ -19,20 +19,21 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; public class RMAppAttemptUnregistrationEvent extends RMAppAttemptEvent { private final String trackingUrl; - private final String finalState; + private final FinalApplicationStatus finalStatus; private final String diagnostics; public RMAppAttemptUnregistrationEvent(ApplicationAttemptId appAttemptId, - String trackingUrl, String finalState, String diagnostics) { + String trackingUrl, FinalApplicationStatus finalStatus, String diagnostics) { super(appAttemptId, RMAppAttemptEventType.UNREGISTERED); this.trackingUrl = trackingUrl; - this.finalState = finalState; + this.finalStatus = finalStatus; this.diagnostics = diagnostics; } @@ -40,12 +41,12 @@ public String getTrackingUrl() { return this.trackingUrl; } - public String getFinalState() { - return this.finalState; + public FinalApplicationStatus getFinalApplicationStatus() { + return this.finalStatus; } public String getDiagnostics() { return this.diagnostics; } -} \ No newline at end of file +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 6928cdb19d..814e1b0796 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -110,7 +110,7 @@ public static QueueMetrics forQueue(MetricsSystem ms, String queueName, "Metrics for queue: " + queueName, metrics); } - synchronized QueueMetrics getUserMetrics(String userName) { + public synchronized QueueMetrics getUserMetrics(String userName) { if (users == null) { return null; } @@ -280,27 +280,27 @@ public void unreserveResource(String user, Resource res) { parent.unreserveResource(user, res); } } - + public int getAppsSubmitted() { return appsSubmitted.value(); } - + public int getAppsRunning() { return appsRunning.value(); } - + public int getAppsPending() { return appsPending.value(); } - + public int getAppsCompleted() { return appsCompleted.value(); } - + public int getAppsKilled() { return appsKilled.value(); } - + public int getAppsFailed() { return appsFailed.value(); } @@ -312,7 +312,7 @@ public int getAllocatedGB() { public int getAllocatedContainers() { return allocatedContainers.value(); } - + public int getAvailableGB() { return availableGB.value(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java index 10913e0999..5ee6d46e95 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java @@ -136,6 +136,10 @@ public Resource getResource(Priority priority) { return this.appSchedulingInfo.getResource(priority); } + /** + * Is this application pending? + * @return true if it is else false. + */ public boolean isPending() { return this.appSchedulingInfo.isPending(); } @@ -144,6 +148,10 @@ public String getQueueName() { return this.appSchedulingInfo.getQueueName(); } + /** + * Get the list of live containers + * @return All of the live containers + */ public synchronized Collection getLiveContainers() { return new ArrayList(liveContainers.values()); } @@ -419,7 +427,11 @@ public synchronized float getLocalityWaitFactor( return Math.min(((float)requiredResources / clusterNodes), 1.0f); } - public synchronized List getAllReservedContainers() { + /** + * Get the list of reserved containers + * @return All of the reserved containers. + */ + public synchronized List getReservedContainers() { List reservedContainers = new ArrayList(); for (Map.Entry> e : this.reservedContainers.entrySet()) { @@ -447,5 +459,4 @@ public synchronized Resource getHeadroom() { public Queue getQueue() { return queue; } - } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java new file mode 100644 index 0000000000..c5ce4af9db --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import java.util.Collection; + +import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; + +/** + * Represents an application attempt, and the resources that the attempt is + * using. + */ +@Evolving +@LimitedPrivate("yarn") +public class SchedulerAppReport { + + private final Collection live; + private final Collection reserved; + private final boolean pending; + + public SchedulerAppReport(SchedulerApp app) { + this.live = app.getLiveContainers(); + this.reserved = app.getReservedContainers(); + this.pending = app.isPending(); + } + + /** + * Get the list of live containers + * @return All of the live containers + */ + public Collection getLiveContainers() { + return live; + } + + /** + * Get the list of reserved containers + * @return All of the reserved containers. + */ + public Collection getReservedContainers() { + return reserved; + } + + /** + * Is this application pending? + * @return true if it is else false. + */ + public boolean isPending() { + return pending; + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java index 61b8d608a3..b1f6c64842 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java @@ -28,19 +28,34 @@ @Private @Stable public class SchedulerNodeReport { - private final Resource usedResources; - private final int numContainers; + private final Resource used; + private final Resource avail; + private final int num; - public SchedulerNodeReport(Resource used, int numContainers) { - this.usedResources = used; - this.numContainers = numContainers; + public SchedulerNodeReport(SchedulerNode node) { + this.used = node.getUsedResource(); + this.avail = node.getAvailableResource(); + this.num = node.getNumContainers(); + } + + /** + * @return the amount of resources currently used by the node. + */ + public Resource getUsedResource() { + return used; } - public Resource getUsedResources() { - return usedResources; + /** + * @return the amount of resources currently available on the node + */ + public Resource getAvailableResource() { + return avail; } + /** + * @return the number of containers currently running on this node. + */ public int getNumContainers() { - return numContainers; + return num; } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 9f6a2f7837..7f7e994e7b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -21,8 +21,9 @@ import java.io.IOException; import java.util.List; -import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -57,7 +58,6 @@ public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, /** * Get acls for queues for current user. * @return acls for queues for current user - * @throws IOException */ @Public @Stable @@ -99,28 +99,27 @@ public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, /** * Get node resource usage report. * @param nodeId - * @return the {@link SchedulerNodeReport} for the node + * @return the {@link SchedulerNodeReport} for the node or null + * if nodeId does not point to a defined node. */ - @Private + @LimitedPrivate("yarn") @Stable public SchedulerNodeReport getNodeReport(NodeId nodeId); /** - * Get used resources on the node - * @param nodeId node - * @return used resources on the node + * Get the Scheduler app for a given app attempt Id. + * @param appAttemptId the id of the application attempt + * @return SchedulerApp for this given attempt. */ - @Private + @LimitedPrivate("yarn") @Stable - Resource getUsedResource(NodeId nodeId); + SchedulerAppReport getSchedulerAppInfo(ApplicationAttemptId appAttemptId); /** - * Get available resources on the node - * @param nodeId node - * @return available resources on the node + * Get the root queue for the scheduler. + * @return the root queue for the scheduler. */ - @Private - @Stable - Resource getAvailableResource(NodeId nodeId); - + @LimitedPrivate("yarn") + @Evolving + QueueMetrics getRootQueueMetrics(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index abbe0365bb..f0c38db2fa 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -58,8 +58,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; @@ -128,10 +130,15 @@ public int compare(SchedulerApp a1, SchedulerApp a2) { public CapacityScheduler() {} + @Override + public QueueMetrics getRootQueueMetrics() { + return root.getMetrics(); + } + public CSQueue getRootQueue() { return root; } - + @Override public CapacitySchedulerConfiguration getConfiguration() { return conf; @@ -152,14 +159,6 @@ public Resource getMaximumResourceCapability() { return maximumAllocation; } - public synchronized Resource getUsedResource(NodeId nodeId) { - return nodes.get(nodeId).getUsedResource(); - } - - public synchronized Resource getAvailableResource(NodeId nodeId) { - return nodes.get(nodeId).getAvailableResource(); - } - public synchronized int getNumClusterNodes() { return numNodeManagers; } @@ -401,7 +400,7 @@ private synchronized void doneApplication( } // Release all reserved containers - for (RMContainer rmContainer : application.getAllReservedContainers()) { + for (RMContainer rmContainer : application.getReservedContainers()) { completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus( rmContainer.getContainerId(), @@ -465,21 +464,25 @@ public Allocation allocate(ApplicationAttemptId applicationAttemptId, if (!ask.isEmpty()) { - LOG.info("DEBUG --- allocate: pre-update" + + if(LOG.isDebugEnabled()) { + LOG.debug("allocate: pre-update" + " applicationAttemptId=" + applicationAttemptId + " application=" + application); + } application.showRequests(); // Update application requests application.updateResourceRequests(ask); - LOG.info("DEBUG --- allocate: post-update"); + LOG.debug("allocate: post-update"); application.showRequests(); } - LOG.info("DEBUG --- allocate:" + + if(LOG.isDebugEnabled()) { + LOG.debug("allocate:" + " applicationAttemptId=" + applicationAttemptId + " #ask=" + ask.size()); + } return new Allocation( application.pullNewlyAllocatedContainers(), @@ -548,14 +551,16 @@ private synchronized void nodeUpdate(RMNode nm, // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); - LOG.info("DEBUG --- Container FINISHED: " + containerId); + LOG.debug("Container FINISHED: " + containerId); completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); } // Now node data structures are upto date and ready for scheduling. - LOG.info("DEBUG -- Node being looked for scheduling " + nm + if(LOG.isDebugEnabled()) { + LOG.debug("Node being looked for scheduling " + nm + " availableResource: " + node.getAvailableResource()); + } // Assign new containers... // 1. Check for reserved applications @@ -733,6 +738,13 @@ SchedulerApp getApplication(ApplicationAttemptId applicationAttemptId) { return applications.get(applicationAttemptId); } + @Override + public SchedulerAppReport getSchedulerAppInfo( + ApplicationAttemptId applicationAttemptId) { + SchedulerApp app = getApplication(applicationAttemptId); + return app == null ? null : new SchedulerAppReport(app); + } + @Lock(Lock.NoLock.class) SchedulerNode getNode(NodeId nodeId) { return nodes.get(nodeId); @@ -764,8 +776,7 @@ public void recover(RMState state) throws Exception { @Override public SchedulerNodeReport getNodeReport(NodeId nodeId) { SchedulerNode node = getNode(nodeId); - return new SchedulerNodeReport( - node.getUsedResource(), node.getNumContainers()); + return node == null ? null : new SchedulerNodeReport(node); } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index e67d371ee6..db597447a8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import java.io.IOException; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; @@ -34,6 +35,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; @@ -41,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerToken; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; @@ -181,9 +184,10 @@ public LeafQueue(CapacitySchedulerContext cs, maxActiveApplications, maxActiveApplicationsPerUser, state, acls); - LOG.info("DEBUG --- LeafQueue:" + - " name=" + queueName + - ", fullname=" + getQueuePath()); + if(LOG.isDebugEnabled()) { + LOG.debug("LeafQueue:" + " name=" + queueName + + ", fullname=" + getQueuePath()); + } this.pendingApplications = new TreeSet(applicationComparator); @@ -670,9 +674,10 @@ private synchronized SchedulerApp getApplication( public synchronized Resource assignContainers(Resource clusterResource, SchedulerNode node) { - LOG.info("DEBUG --- assignContainers:" + - " node=" + node.getHostName() + - " #applications=" + activeApplications.size()); + if(LOG.isDebugEnabled()) { + LOG.debug("assignContainers: node=" + node.getHostName() + + " #applications=" + activeApplications.size()); + } // Check for reserved resources RMContainer reservedContainer = node.getReservedContainer(); @@ -686,8 +691,10 @@ private synchronized SchedulerApp getApplication( // Try to assign containers to applications in order for (SchedulerApp application : activeApplications) { - LOG.info("DEBUG --- pre-assignContainers for application " - + application.getApplicationId()); + if(LOG.isDebugEnabled()) { + LOG.debug("pre-assignContainers for application " + + application.getApplicationId()); + } application.showRequests(); synchronized (application) { @@ -747,8 +754,10 @@ private synchronized SchedulerApp getApplication( } } - LOG.info("DEBUG --- post-assignContainers for application " + if(LOG.isDebugEnabled()) { + LOG.debug("post-assignContainers for application " + application.getApplicationId()); + } application.showRequests(); } @@ -1052,6 +1061,7 @@ private Container getContainer(RMContainer rmContainer, createContainer(application, node, capability, priority); } + public Container createContainer(SchedulerApp application, SchedulerNode node, Resource capability, Priority priority) { Container container = @@ -1065,9 +1075,9 @@ public Container createContainer(SchedulerApp application, SchedulerNode node, if (UserGroupInformation.isSecurityEnabled()) { ContainerToken containerToken = this.recordFactory.newRecordInstance(ContainerToken.class); - ContainerTokenIdentifier tokenidentifier = - new ContainerTokenIdentifier(container.getId(), - container.getNodeId().toString(), container.getResource()); + NodeId nodeId = container.getNodeId(); + ContainerTokenIdentifier tokenidentifier = new ContainerTokenIdentifier( + container.getId(), nodeId.toString(), container.getResource()); containerToken.setIdentifier( ByteBuffer.wrap(tokenidentifier.getBytes())); containerToken.setKind(ContainerTokenIdentifier.KIND.toString()); @@ -1075,7 +1085,11 @@ public Container createContainer(SchedulerApp application, SchedulerNode node, ByteBuffer.wrap( containerTokenSecretManager.createPassword(tokenidentifier)) ); - containerToken.setService(container.getNodeId().toString()); + // RPC layer client expects ip:port as service for tokens + InetSocketAddress addr = NetUtils.createSocketAddr(nodeId.getHost(), + nodeId.getPort()); + containerToken.setService(addr.getAddress().getHostAddress() + ":" + + addr.getPort()); container.setContainerToken(containerToken); } @@ -1086,11 +1100,10 @@ private Resource assignContainer(Resource clusterResource, SchedulerNode node, SchedulerApp application, Priority priority, ResourceRequest request, NodeType type, RMContainer rmContainer) { if (LOG.isDebugEnabled()) { - LOG.info("DEBUG --- assignContainers:" + - " node=" + node.getHostName() + - " application=" + application.getApplicationId().getId() + - " priority=" + priority.getPriority() + - " request=" + request + " type=" + type); + LOG.debug("assignContainers: node=" + node.getHostName() + + " application=" + application.getApplicationId().getId() + + " priority=" + priority.getPriority() + + " request=" + request + " type=" + type); } Resource capability = request.getCapability(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 6aa282798c..dc363a210f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -141,7 +141,7 @@ public ParentQueue(CapacitySchedulerContext cs, maximumCapacity, absoluteMaxCapacity, state, acls); this.queueComparator = comparator; - this.childQueues = new TreeSet(comparator); + this.childQueues = new TreeSet(queueComparator); LOG.info("Initialized parent-queue " + queueName + " name=" + queueName + @@ -197,7 +197,9 @@ void setChildQueues(Collection childQueues) { this.childQueues.clear(); this.childQueues.addAll(childQueues); - LOG.info("DEBUG --- setChildQueues: " + getChildQueuesToPrint()); + if (LOG.isDebugEnabled()) { + LOG.debug("setChildQueues: " + getChildQueuesToPrint()); + } } @Override @@ -497,8 +499,10 @@ public synchronized Resource assignContainers( Resource assigned = Resources.createResource(0); while (canAssign(node)) { - LOG.info("DEBUG --- Trying to assign containers to child-queue of " + - getQueueName()); + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to assign containers to child-queue of " + + getQueueName()); + } // Are we over maximum-capacity for this queue? if (!assignToQueue(clusterResource)) { @@ -527,11 +531,12 @@ public synchronized Resource assignContainers( break; } - LOG.info("DEBUG ---" + - " parentQ=" + getQueueName() + - " assignedSoFarInThisIteration=" + assigned + - " utilization=" + getUtilization()); - + if (LOG.isDebugEnabled()) { + LOG.debug("ParentQ=" + getQueueName() + + " assignedSoFarInThisIteration=" + assigned + + " utilization=" + getUtilization()); + } + // Do not assign more than one container if this isn't the root queue if (!rootQueue) { break; @@ -571,13 +576,15 @@ synchronized Resource assignContainersToChildQueues(Resource cluster, // Try to assign to most 'under-served' sub-queue for (Iterator iter=childQueues.iterator(); iter.hasNext();) { CSQueue childQueue = iter.next(); - LOG.info("DEBUG --- Trying to assign to" + - " queue: " + childQueue.getQueuePath() + - " stats: " + childQueue); + if(LOG.isDebugEnabled()) { + LOG.debug("Trying to assign to queue: " + childQueue.getQueuePath() + + " stats: " + childQueue); + } assigned = childQueue.assignContainers(cluster, node); - LOG.info("DEBUG --- Assignedto" + - " queue: " + childQueue.getQueuePath() + - " stats: " + childQueue + " --> " + assigned.getMemory()); + if(LOG.isDebugEnabled()) { + LOG.debug("Assignedto queue: " + childQueue.getQueuePath() + + " stats: " + childQueue + " --> " + assigned.getMemory()); + } // If we do assign, remove the queue and re-insert in-order to re-sort if (Resources.greaterThan(assigned, Resources.none())) { @@ -602,8 +609,10 @@ String getChildQueuesToPrint() { return sb.toString(); } void printChildQueues() { - LOG.info("DEBUG --- printChildQueues - queue: " + getQueuePath() + - " child-queues: " + getChildQueuesToPrint()); + if (LOG.isDebugEnabled()) { + LOG.debug("printChildQueues - queue: " + getQueuePath() + + " child-queues: " + getChildQueuesToPrint()); + } } @Override diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 7a90c5b6fa..1060762406 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo; import java.io.IOException; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -35,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.Lock; @@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; +import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -70,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; @@ -82,7 +86,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager; import org.apache.hadoop.yarn.util.BuilderUtils; -import org.apache.hadoop.yarn.api.records.QueueState; @LimitedPrivate("yarn") @Evolving @@ -170,14 +173,6 @@ public List getQueueUserAclInfo( } }; - public synchronized Resource getUsedResource(NodeId nodeId) { - return getNode(nodeId).getUsedResource(); - } - - public synchronized Resource getAvailableResource(NodeId nodeId) { - return getNode(nodeId).getAvailableResource(); - } - @Override public Resource getMinimumResourceCapability() { return minimumAllocation; @@ -285,6 +280,13 @@ private SchedulerApp getApplication( return applications.get(applicationAttemptId); } + @Override + public SchedulerAppReport getSchedulerAppInfo( + ApplicationAttemptId applicationAttemptId) { + SchedulerApp app = getApplication(applicationAttemptId); + return app == null ? null : new SchedulerAppReport(app); + } + private SchedulerNode getNode(NodeId nodeId) { return nodes.get(nodeId); } @@ -535,16 +537,21 @@ private int assignContainer(SchedulerNode node, SchedulerApp application, if (UserGroupInformation.isSecurityEnabled()) { ContainerToken containerToken = recordFactory.newRecordInstance(ContainerToken.class); + NodeId nodeId = container.getNodeId(); ContainerTokenIdentifier tokenidentifier = new ContainerTokenIdentifier(container.getId(), - container.getNodeId().toString(), container.getResource()); + nodeId.toString(), container.getResource()); containerToken.setIdentifier( ByteBuffer.wrap(tokenidentifier.getBytes())); containerToken.setKind(ContainerTokenIdentifier.KIND.toString()); containerToken.setPassword( ByteBuffer.wrap(containerTokenSecretManager .createPassword(tokenidentifier))); - containerToken.setService(container.getNodeId().toString()); + // RPC layer client expects ip:port as service for tokens + InetSocketAddress addr = NetUtils.createSocketAddr( + nodeId.getHost(), nodeId.getPort()); + containerToken.setService(addr.getAddress().getHostAddress() + ":" + + addr.getPort()); container.setContainerToken(containerToken); } @@ -580,7 +587,7 @@ private synchronized void nodeUpdate(RMNode rmNode, // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); - LOG.info("DEBUG --- Container FINISHED: " + containerId); + LOG.debug("Container FINISHED: " + containerId); containerCompleted(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); } @@ -703,6 +710,9 @@ private synchronized void containerCompleted(RMContainer rmContainer, // Inform the node node.releaseContainer(container); + + // Update total usage + Resources.subtractFrom(usedResource, container.getResource()); LOG.info("Application " + applicationAttemptId + " released container " + container.getId() + @@ -762,8 +772,7 @@ public void recover(RMState state) { @Override public synchronized SchedulerNodeReport getNodeReport(NodeId nodeId) { SchedulerNode node = getNode(nodeId); - return new SchedulerNodeReport( - node.getUsedResource(), node.getNumContainers()); + return node == null ? null : new SchedulerNodeReport(node); } private RMContainer getRMContainer(ContainerId containerId) { @@ -772,4 +781,9 @@ private RMContainer getRMContainer(ContainerId containerId) { return (application == null) ? null : application.getRMContainer(containerId); } + @Override + public QueueMetrics getRootQueueMetrics() { + return DEFAULT_QUEUE.getMetrics(); + } + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java index 3c367a6a6b..84e48f039f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/tools/RMAdmin.java @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SecurityInfo; @@ -149,11 +148,6 @@ private RMAdminProtocol createAdminProtocol() throws IOException { YarnConfiguration.RM_ADMIN_ADDRESS); final YarnRPC rpc = YarnRPC.create(conf); - if (UserGroupInformation.isSecurityEnabled()) { - conf.setClass(YarnConfiguration.YARN_SECURITY_INFO, - AdminSecurityInfo.class, SecurityInfo.class); - } - RMAdminProtocol adminProtocol = getUGI(conf).doAs(new PrivilegedAction() { @Override diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java new file mode 100644 index 0000000000..aaee9b1bf0 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java @@ -0,0 +1,55 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp; + +import org.apache.hadoop.util.VersionInfo; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.YarnVersionInfo; +import org.apache.hadoop.yarn.webapp.view.HtmlBlock; +import org.apache.hadoop.yarn.webapp.view.InfoBlock; + +import com.google.inject.Inject; + +public class AboutBlock extends HtmlBlock { + final ResourceManager rm; + + @Inject + AboutBlock(ResourceManager rm, ViewContext ctx) { + super(ctx); + this.rm = rm; + } + + @Override + protected void render(Block html) { + html._(MetricsOverviewTable.class); + long ts = ResourceManager.clusterTimeStamp; + ResourceManager rm = getInstance(ResourceManager.class); + info("Cluster overview"). + _("Cluster ID:", ts). + _("ResourceManager state:", rm.getServiceState()). + _("ResourceManager started on:", Times.format(ts)). + _("ResourceManager version:", YarnVersionInfo.getBuildVersion() + + " on " + YarnVersionInfo.getDate()). + _("Hadoop version:", VersionInfo.getBuildVersion() + + " on " + VersionInfo.getDate()); + html._(InfoBlock.class); + } + +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/InfoPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutPage.java similarity index 89% rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/InfoPage.java rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutPage.java index 687c249051..ef0fdcf199 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/InfoPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutPage.java @@ -19,15 +19,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import org.apache.hadoop.yarn.webapp.SubView; -import org.apache.hadoop.yarn.webapp.view.InfoBlock; -public class InfoPage extends RmView { +public class AboutPage extends RmView { @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); } @Override protected Class content() { - return InfoBlock.class; + return AboutBlock.class; } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java index 8d2d0957db..41ed833a99 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java @@ -50,6 +50,7 @@ class AppsBlock extends HtmlBlock { th(".name", "Name"). th(".queue", "Queue"). th(".state", "State"). + th(".finalstatus", "FinalStatus"). th(".progress", "Progress"). th(".ui", "Tracking UI"). th(".note", "Note")._()._(). @@ -60,7 +61,7 @@ class AppsBlock extends HtmlBlock { String trackingUrl = app.getTrackingUrl(); String ui = trackingUrl == null || trackingUrl.isEmpty() ? "UNASSIGNED" : (app.getFinishTime() == 0 ? - "ApplicationMaster URL" : "JobHistory URL"); + "ApplicationMaster" : "History"); String percent = String.format("%.1f", app.getProgress() * 100); tbody. tr(). @@ -70,8 +71,8 @@ class AppsBlock extends HtmlBlock { td(app.getUser().toString()). td(app.getName().toString()). td(app.getQueue().toString()). - td(app.getState() == RMAppState.FINISHED ? app.getAMFinalState() : - app.getState().toString()). + td(app.getState().toString()). + td(app.getFinalApplicationStatus().toString()). td(). br().$title(percent)._(). // for sorting div(_PROGRESSBAR). @@ -79,7 +80,8 @@ class AppsBlock extends HtmlBlock { div(_PROGRESSBAR_VALUE). $style(join("width:", percent, '%'))._()._()._(). td(). - a(trackingUrl == null ? "#" : join("http://", trackingUrl), ui)._(). + a(trackingUrl == null || trackingUrl.isEmpty() || "N/A".equalsIgnoreCase(trackingUrl) ? + "#" : join("http://", trackingUrl), ui)._(). td(app.getDiagnostics().toString())._(); if (list.rendering != Render.HTML && ++i >= 20) break; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlockWithMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlockWithMetrics.java new file mode 100644 index 0000000000..6d461f659c --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlockWithMetrics.java @@ -0,0 +1,31 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp; + +import org.apache.hadoop.yarn.webapp.view.HtmlBlock; + +/** + * Renders a block for the applications with metrics information. + */ +class AppsBlockWithMetrics extends HtmlBlock { + @Override public void render(Block html) { + html._(MetricsOverviewTable.class); + html._(AppsBlock.class); + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java index ba6b0ea231..def6011d4f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java @@ -62,8 +62,8 @@ void toDataTableArrays(PrintWriter out) { } String appID = app.getApplicationId().toString(); String trackingUrl = app.getTrackingUrl(); - String ui = trackingUrl == null ? "UNASSIGNED" : - (app.getFinishTime() == 0 ? "ApplicationMaster" : "JobHistory"); + String ui = trackingUrl == null || trackingUrl.isEmpty() || "N/A".equalsIgnoreCase(trackingUrl) ? + "UNASSIGNED" : (app.getFinishTime() == 0 ? "ApplicationMaster" : "JobHistory"); out.append("[\""); appendSortable(out, app.getApplicationId().getId()); appendLink(out, appID, rc.prefix(), "app", appID).append(_SEP). @@ -73,7 +73,8 @@ void toDataTableArrays(PrintWriter out) { append(app.getState().toString()).append(_SEP); appendProgressBar(out, app.getProgress()).append(_SEP); appendLink(out, ui, rc.prefix(), - trackingUrl == null ? "#" : "http://", trackingUrl). + trackingUrl == null || trackingUrl.isEmpty() || "N/A".equalsIgnoreCase(trackingUrl) ? + "#" : "http://", trackingUrl). append(_SEP).append(escapeJavaScript(escapeHtml( app.getDiagnostics().toString()))). append("\"]"); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java index f36e181502..a27ba15c45 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java @@ -31,7 +31,6 @@ import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import static org.apache.hadoop.yarn.util.StringHelper.*; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; class CapacitySchedulerPage extends RmView { static final String _Q = ".ui-state-default.ui-corner-all"; @@ -96,6 +95,7 @@ static class QueuesBlock extends HtmlBlock { @Override public void render(Block html) { + html._(MetricsOverviewTable.class); UL>> ul = html. div("#cs-wrapper.ui-widget"). div(".ui-widget-header.ui-corner-top"). diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/DefaultSchedulerPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/DefaultSchedulerPage.java index a232e5bcc8..8db4caffd8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/DefaultSchedulerPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/DefaultSchedulerPage.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import com.google.inject.Inject; -import com.google.inject.servlet.RequestScoped; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; @@ -35,7 +34,6 @@ import org.apache.hadoop.yarn.webapp.view.InfoBlock; import static org.apache.hadoop.yarn.util.StringHelper.*; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; class DefaultSchedulerPage extends RmView { static final String _Q = ".ui-state-default.ui-corner-all"; @@ -76,8 +74,9 @@ static class QueueInfoBlock extends HtmlBlock { int nodeContainers = 0; for (RMNode ni : this.rmContext.getRMNodes().values()) { - usedNodeMem += fs.getUsedResource(ni.getNodeID()).getMemory(); - availNodeMem += fs.getAvailableResource(ni.getNodeID()).getMemory(); + SchedulerNodeReport report = fs.getNodeReport(ni.getNodeID()); + usedNodeMem += report.getUsedResource().getMemory(); + availNodeMem += report.getAvailableResource().getMemory(); totNodeMem += ni.getTotalCapability().getMemory(); nodeContainers += fs.getNodeReport(ni.getNodeID()).getNumContainers(); } @@ -109,6 +108,7 @@ static class QueuesBlock extends HtmlBlock { @Override public void render(Block html) { + html._(MetricsOverviewTable.class); UL>> ul = html. div("#cs-wrapper.ui-widget"). div(".ui-widget-header.ui-corner-top"). diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java new file mode 100644 index 0000000000..7e0150ab9e --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -0,0 +1,164 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp; + +import java.util.concurrent.ConcurrentMap; + +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; +import org.apache.hadoop.yarn.webapp.view.HtmlBlock; + +import com.google.inject.Inject; + +/** + * Provides an table with an overview of many cluster wide metrics and if + * per user metrics are enabled it will show an overview of what the + * current user is using on the cluster. + */ +public class MetricsOverviewTable extends HtmlBlock { + private static final long BYTES_IN_GB = 1024 * 1024 * 1024; + + private final RMContext rmContext; + private final ResourceManager rm; + + @Inject + MetricsOverviewTable(RMContext context, ResourceManager rm, ViewContext ctx) { + super(ctx); + this.rmContext = context; + this.rm = rm; + } + + + @Override + protected void render(Block html) { + //Yes this is a hack, but there is no other way to insert + //CSS in the correct spot + html.style(".metrics {margin-bottom:5px}"); + + ResourceScheduler rs = rm.getResourceScheduler(); + QueueMetrics metrics = rs.getRootQueueMetrics(); + + int appsSubmitted = metrics.getAppsSubmitted(); + int reservedGB = metrics.getReservedGB(); + int availableGB = metrics.getAvailableGB(); + int allocatedGB = metrics.getAllocatedGB(); + int containersAllocated = metrics.getAllocatedContainers(); + int totalGB = availableGB + reservedGB + allocatedGB; + + ConcurrentMap nodes = rmContext.getRMNodes(); + int totalNodes = nodes.size(); + int lostNodes = 0; + int unhealthyNodes = 0; + int decommissionedNodes = 0; + for(RMNode node: nodes.values()) { + if(node == null || node.getState() == null) { + lostNodes++; + continue; + } + switch(node.getState()) { + case DECOMMISSIONED: + decommissionedNodes++; + break; + case LOST: + lostNodes++; + break; + case UNHEALTHY: + unhealthyNodes++; + break; + //RUNNING noop + } + } + + DIV div = html.div().$class("metrics"); + + div.table("#metricsoverview"). + thead().$class("ui-widget-header"). + tr(). + th().$class("ui-state-default")._("Apps Submitted")._(). + th().$class("ui-state-default")._("Containers Running")._(). + th().$class("ui-state-default")._("Memory Used")._(). + th().$class("ui-state-default")._("Memopry Total")._(). + th().$class("ui-state-default")._("Memory Reserved")._(). + th().$class("ui-state-default")._("Total Nodes")._(). + th().$class("ui-state-default")._("Decommissioned Nodes")._(). + th().$class("ui-state-default")._("Lost Nodes")._(). + th().$class("ui-state-default")._("Unhealthy Nodes")._(). + _(). + _(). + tbody().$class("ui-widget-content"). + tr(). + td(String.valueOf(appsSubmitted)). + td(String.valueOf(containersAllocated)). + td(StringUtils.byteDesc(allocatedGB * BYTES_IN_GB)). + td(StringUtils.byteDesc(totalGB * BYTES_IN_GB)). + td(StringUtils.byteDesc(reservedGB * BYTES_IN_GB)). + td().a(url("nodes"),String.valueOf(totalNodes))._(). + td().a(url("nodes/DECOMMISSIONED"),String.valueOf(decommissionedNodes))._(). + td().a(url("nodes/LOST"),String.valueOf(lostNodes))._(). + td().a(url("nodes/UNHEALTHY"),String.valueOf(unhealthyNodes))._(). + _(). + _()._(); + + String user = request().getRemoteUser(); + if (user != null) { + QueueMetrics userMetrics = metrics.getUserMetrics(user); + if(userMetrics != null) { + int myAppsSubmitted = userMetrics.getAppsSubmitted(); + int myRunningContainers = userMetrics.getAllocatedContainers(); + int myPendingContainers = userMetrics.getPendingContainers(); + int myReservedContainers = userMetrics.getReservedContainers(); + int myReservedGB = userMetrics.getReservedGB(); + int myPendingGB = userMetrics.getPendingGB(); + int myAllocatedGB = userMetrics.getAllocatedGB(); + div.table("#usermetricsoverview"). + thead().$class("ui-widget-header"). + tr(). + th().$class("ui-state-default")._("Apps Submitted ("+user+")")._(). + th().$class("ui-state-default")._("Containers Running ("+user+")")._(). + th().$class("ui-state-default")._("Containers Pending ("+user+")")._(). + th().$class("ui-state-default")._("Containers Reserved ("+user+")")._(). + th().$class("ui-state-default")._("Memory Used ("+user+")")._(). + th().$class("ui-state-default")._("Memory Pending ("+user+")")._(). + th().$class("ui-state-default")._("Memory Reserved ("+user+")")._(). + _(). + _(). + tbody().$class("ui-widget-content"). + tr(). + td(String.valueOf(myAppsSubmitted)). + td(String.valueOf(myRunningContainers)). + td(String.valueOf(myPendingContainers)). + td(String.valueOf(myReservedContainers)). + td(StringUtils.byteDesc(myAllocatedGB * BYTES_IN_GB)). + td(StringUtils.byteDesc(myPendingGB * BYTES_IN_GB)). + td(StringUtils.byteDesc(myReservedGB * BYTES_IN_GB)). + _(). + _()._(); + } + } + + div._(); + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 1d074e3160..ea6f408228 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -18,14 +18,21 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebApp.NODE_STATE; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; @@ -38,45 +45,75 @@ class NodesPage extends RmView { static class NodesBlock extends HtmlBlock { + private static final long BYTES_IN_MB = 1024 * 1024; final RMContext rmContext; + final ResourceManager rm; @Inject - NodesBlock(RMContext context, ViewContext ctx) { + NodesBlock(RMContext context, ResourceManager rm, ViewContext ctx) { super(ctx); this.rmContext = context; + this.rm = rm; } @Override protected void render(Block html) { + html._(MetricsOverviewTable.class); + + ResourceScheduler sched = rm.getResourceScheduler(); + String type = $(NODE_STATE); TBODY> tbody = html.table("#nodes"). thead(). tr(). th(".rack", "Rack"). + th(".state", "Node State"). th(".nodeaddress", "Node Address"). th(".nodehttpaddress", "Node HTTP Address"). th(".healthStatus", "Health-status"). th(".lastHealthUpdate", "Last health-update"). th(".healthReport", "Health-report"). th(".containers", "Containers"). -// th(".mem", "Mem Used (MB)"). -// th(".mem", "Mem Avail (MB)"). + th(".mem", "Mem Used"). + th(".mem", "Mem Avail"). _()._(). tbody(); + RMNodeState stateFilter = null; + if(type != null && !type.isEmpty()) { + stateFilter = RMNodeState.valueOf(type.toUpperCase()); + } for (RMNode ni : this.rmContext.getRMNodes().values()) { + if(stateFilter != null) { + RMNodeState state = ni.getState(); + if(!stateFilter.equals(state)) { + continue; + } + } + NodeId id = ni.getNodeID(); + SchedulerNodeReport report = sched.getNodeReport(id); + int numContainers = 0; + int usedMemory = 0; + int availableMemory = 0; + if(report != null) { + numContainers = report.getNumContainers(); + usedMemory = report.getUsedResource().getMemory(); + availableMemory = report.getAvailableResource().getMemory(); + } + NodeHealthStatus health = ni.getNodeHealthStatus(); tbody.tr(). td(ni.getRackName()). + td(String.valueOf(ni.getState())). td(String.valueOf(ni.getNodeID().toString())). td().a("http://" + ni.getHttpAddress(), ni.getHttpAddress())._(). td(health.getIsNodeHealthy() ? "Healthy" : "Unhealthy"). td(Times.format(health.getLastHealthReportTime())). td(String.valueOf(health.getHealthReport())). - // TODO: acm: refactor2 FIXME - //td(String.valueOf(ni.getNumContainers())). - // TODO: FIXME Vinodkv -// td(String.valueOf(ni.getUsedResource().getMemory())). -// td(String.valueOf(ni.getAvailableResource().getMemory())). - td("n/a")._(); + td(String.valueOf(numContainers)). + td().br().$title(String.valueOf(usedMemory))._(). + _(StringUtils.byteDesc(usedMemory * BYTES_IN_MB))._(). + td().br().$title(String.valueOf(usedMemory))._(). + _(StringUtils.byteDesc(availableMemory * BYTES_IN_MB))._(). + _(); } tbody._()._(); } @@ -84,7 +121,12 @@ protected void render(Block html) { @Override protected void preHead(Page.HTML<_> html) { commonPreHead(html); - setTitle("Nodes of the cluster"); + String type = $(NODE_STATE); + String title = "Nodes of the cluster"; + if(type != null && !type.isEmpty()) { + title = title+" ("+type+")"; + } + setTitle(title); set(DATATABLES_ID, "nodes"); set(initID(DATATABLES, "nodes"), nodesTableInit()); setTableStyles(html, "nodes", ".healthStatus {width:10em}", @@ -96,11 +138,10 @@ protected void render(Block html) { } private String nodesTableInit() { - return tableInit(). - // rack, nodeid, host, healthStatus, health update ts, health report, - // containers, memused, memavail - append(", aoColumns:[null, null, null, null, null, null, "). - append("{sType:'title-numeric', bSearchable:false}]}"). - toString(); + StringBuilder b = tableInit().append(",aoColumnDefs:["); + b.append("{'bSearchable':false, 'aTargets': [7]} ,"); + b.append("{'sType':'title-numeric', 'bSearchable':false, " + + "'aTargets': [ 8, 9] }]}"); + return b.toString(); } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java index 19a43d0f12..50f6c4e99a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java @@ -30,6 +30,7 @@ public class RMWebApp extends WebApp { static final String APP_ID = "app.id"; static final String QUEUE_NAME = "queue.name"; + static final String NODE_STATE = "node.state"; private final ResourceManager rm; @@ -44,9 +45,9 @@ public void setup() { bind(RMContext.class).toInstance(rm.getRMContext()); } route("/", RmController.class); - route("/nodes", RmController.class, "nodes"); + route(pajoin("/nodes", NODE_STATE), RmController.class, "nodes"); route("/apps", RmController.class); - route("/cluster", RmController.class, "info"); + route("/cluster", RmController.class, "about"); route(pajoin("/app", APP_ID), RmController.class, "app"); route("/scheduler", RmController.class, "scheduler"); route(pajoin("/queue", QUEUE_NAME), RmController.class, "queue"); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java index 698bc3c933..2da95158c1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java @@ -22,8 +22,9 @@ import static org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebApp.QUEUE_NAME; import static org.apache.hadoop.yarn.util.StringHelper.join; +import javax.servlet.http.HttpServletResponse; + import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -35,7 +36,6 @@ import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Times; -import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.hadoop.yarn.webapp.Controller; import org.apache.hadoop.yarn.webapp.ResponseInfo; @@ -50,25 +50,15 @@ public class RmController extends Controller { setTitle("Applications"); } - public void info() { + public void about() { setTitle("About the Cluster"); - long ts = ResourceManager.clusterTimeStamp; - ResourceManager rm = getInstance(ResourceManager.class); - info("Cluster overview"). - _("Cluster ID:", ts). - _("ResourceManager state:", rm.getServiceState()). - _("ResourceManager started on:", Times.format(ts)). - _("ResourceManager version:", YarnVersionInfo.getBuildVersion() + - " on " + YarnVersionInfo.getDate()). - _("Hadoop version:", VersionInfo.getBuildVersion() + - " on " + VersionInfo.getDate()); - render(InfoPage.class); + render(AboutPage.class); } public void app() { String aid = $(APP_ID); if (aid.isEmpty()) { - setStatus(response().SC_BAD_REQUEST); + setStatus(HttpServletResponse.SC_BAD_REQUEST); setTitle("Bad request: requires application ID"); return; } @@ -77,7 +67,7 @@ public void app() { RMApp app = context.getRMApps().get(appID); if (app == null) { // TODO: handle redirect to jobhistory server - setStatus(response().SC_NOT_FOUND); + setStatus(HttpServletResponse.SC_NOT_FOUND); setTitle("Application not found: "+ aid); return; } @@ -89,9 +79,8 @@ public void app() { ResponseInfo info = info("Application Overview"). _("User:", app.getUser()). _("Name:", app.getName()). - _("State:", (app.getState() == RMAppState.FINISHED ? - app.getAMFinalState() : app.getState().toString()) - ). + _("State:", app.getState().toString()). + _("FinalStatus:", app.getFinalApplicationStatus().toString()). _("Started:", Times.format(app.getStartTime())). _("Elapsed:", StringUtils.formatTime( Times.elapsed(app.getStartTime(), app.getFinishTime()))). @@ -108,7 +97,7 @@ public void app() { } else { info._("AM container logs:", "AM not yet registered with RM"); } - render(InfoPage.class); + render(AboutPage.class); } public void nodes() { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java index a5337b8175..7c175d1317 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java @@ -52,7 +52,7 @@ protected Class nav() { @Override protected Class content() { - return AppsBlock.class; + return AppsBlockWithMetrics.class; } private String appsTableInit() { @@ -60,7 +60,7 @@ private String appsTableInit() { // id, user, name, queue, state, progress, ui, note StringBuilder init = tableInit(). append(", aoColumns:[{sType:'title-numeric'}, null, null, null, null,"). - append("{sType:'title-numeric', bSearchable:false}, null, null]"); + append("null,{sType:'title-numeric', bSearchable:false}, null, null]"); String rows = $("rowlimit"); int rowLimit = rows.isEmpty() ? MAX_DISPLAY_ROWS : Integer.parseInt(rows); if (list.apps.size() < rowLimit) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/resources/capacity-scheduler.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/resources/capacity-scheduler.xml index 6db99098ec..968ad90160 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/resources/capacity-scheduler.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/resources/capacity-scheduler.xml @@ -3,76 +3,92 @@ yarn.scheduler.capacity.maximum-applications 10000 - Maximum number of applications that can be running. + + Maximum number of applications that can be pending and running. yarn.scheduler.capacity.maximum-am-resource-percent 0.1 + + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + yarn.scheduler.capacity.root.queues default - The queues at the this level (root is the root queue). + + The queues at the this level (root is the root queue). yarn.scheduler.capacity.root.capacity 100 - The total capacity as a percentage out of 100 for this queue. - If it has child queues then this includes their capacity as well. - The child queues capacity should add up to their parent queue's capacity - or less. - - - - yarn.scheduler.capacity.root.acl_administer_queues - * - The ACL for who can administer this queue. i.e. - change sub queue allocations. + + The total capacity as a percentage out of 100 for this queue. + If it has child queues then this includes their capacity as well. + The child queues capacity should add up to their parent queue's capacity + or less. + yarn.scheduler.capacity.root.default.capacity 100 - default queue target capacity. + Default queue target capacity. yarn.scheduler.capacity.root.default.user-limit-factor 1 - default queue user limit a percantage from 0.0 to 1.0. + + Default queue user limit a percentage from 0.0 to 1.0. yarn.scheduler.capacity.root.default.maximum-capacity -1 - the maximum capacity of the default queue -1 disables. + + The maximum capacity of the default queue. A value of -1 disables this. yarn.scheduler.capacity.root.default.state RUNNING - The state of the default queue. can be RUNNING or STOPPED + + The state of the default queue. State can be one of RUNNING or STOPPED. yarn.scheduler.capacity.root.default.acl_submit_jobs * - The ACL of who can submit jobs to the default queue. + + The ACL of who can submit jobs to the default queue. yarn.scheduler.capacity.root.default.acl_administer_jobs * - The ACL of who can administer jobs on the default queue. + + The ACL of who can administer jobs on the default queue. + + + + + yarn.scheduler.capacity.root.acl_administer_queues + * + + The ACL for who can administer this queue i.e. change sub-queue + allocations. diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index 423b519fec..ee6cc99f0b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -164,9 +164,10 @@ public synchronized void addTask(Task task) { if (requests == null) { requests = new HashMap(); this.requests.put(priority, requests); - LOG.info("DEBUG --- Added" + - " priority=" + priority + - " application=" + applicationId); + if(LOG.isDebugEnabled()) { + LOG.debug("Added priority=" + priority + " application=" + + applicationId); + } } final Resource capability = requestSpec.get(priority); @@ -182,9 +183,10 @@ public synchronized void addTask(Task task) { LOG.info("Added task " + task.getTaskId() + " to application " + applicationId + " at priority " + priority); - LOG.info("DEBUG --- addTask:" + - " application=" + applicationId + - " #asks=" + ask.size()); + if(LOG.isDebugEnabled()) { + LOG.debug("addTask: application=" + applicationId + + " #asks=" + ask.size()); + } // Create resource requests for (String host : task.getHosts()) { @@ -245,23 +247,24 @@ private synchronized void addResourceRequest( org.apache.hadoop.yarn.util.BuilderUtils.newResourceRequest( request)); // clone to ensure the RM doesn't manipulate the same obj - LOG.info("DEBUG --- addResourceRequest:" + - " applicationId=" + applicationId.getId() + - " priority=" + priority.getPriority() + - " resourceName=" + resourceName + - " capability=" + capability + - " numContainers=" + request.getNumContainers() + - " #asks=" + ask.size()); + if(LOG.isDebugEnabled()) { + LOG.debug("addResourceRequest: applicationId=" + applicationId.getId() + + " priority=" + priority.getPriority() + + " resourceName=" + resourceName + " capability=" + capability + + " numContainers=" + request.getNumContainers() + + " #asks=" + ask.size()); + } } public synchronized List getResources() throws IOException { - LOG.info("DEBUG --- getResources begin:" + - " application=" + applicationId + - " #ask=" + ask.size()); - for (ResourceRequest request : ask) { - LOG.info("DEBUG --- getResources:" + - " application=" + applicationId + - " ask-request=" + request); + if(LOG.isDebugEnabled()) { + LOG.debug("getResources begin:" + " application=" + applicationId + + " #ask=" + ask.size()); + + for (ResourceRequest request : ask) { + LOG.debug("getResources:" + " application=" + applicationId + + " ask-request=" + request); + } } // Get resources from the ResourceManager @@ -280,9 +283,10 @@ public synchronized List getResources() throws IOException { // Clear state for next interaction with ResourceManager ask.clear(); - LOG.info("DEBUG --- getResources() for " + applicationId + ":" + - " ask=" + ask.size() + - " recieved=" + containers.size()); + if(LOG.isDebugEnabled()) { + LOG.debug("getResources() for " + applicationId + ":" + + " ask=" + ask.size() + " recieved=" + containers.size()); + } return containers; } @@ -353,22 +357,22 @@ private void updateResourceRequests(Map requests, NodeType type, Task task) { if (type == NodeType.NODE_LOCAL) { for (String host : task.getHosts()) { - LOG.info("DEBUG --- updateResourceRequests:" + - " application=" + applicationId + - " type=" + type + - " host=" + host + - " request=" + ((requests == null) ? "null" : requests.get(host))); + if(LOG.isDebugEnabled()) { + LOG.debug("updateResourceRequests:" + " application=" + applicationId + + " type=" + type + " host=" + host + + " request=" + ((requests == null) ? "null" : requests.get(host))); + } updateResourceRequest(requests.get(host)); } } if (type == NodeType.NODE_LOCAL || type == NodeType.RACK_LOCAL) { for (String rack : task.getRacks()) { - LOG.info("DEBUG --- updateResourceRequests:" + - " application=" + applicationId + - " type=" + type + - " rack=" + rack + - " request=" + ((requests == null) ? "null" : requests.get(rack))); + if(LOG.isDebugEnabled()) { + LOG.debug("updateResourceRequests:" + " application=" + applicationId + + " type=" + type + " rack=" + rack + + " request=" + ((requests == null) ? "null" : requests.get(rack))); + } updateResourceRequest(requests.get(rack)); } } @@ -378,9 +382,10 @@ private void updateResourceRequests(Map requests, org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode.ANY) ); - LOG.info("DEBUG --- updateResourceRequests:" + - " application=" + applicationId + - " #asks=" + ask.size()); + if(LOG.isDebugEnabled()) { + LOG.debug("updateResourceRequests:" + " application=" + applicationId + + " #asks=" + ask.size()); + } } private void updateResourceRequest(ResourceRequest request) { @@ -392,9 +397,10 @@ private void updateResourceRequest(ResourceRequest request) { org.apache.hadoop.yarn.util.BuilderUtils.newResourceRequest( request)); // clone to ensure the RM doesn't manipulate the same obj - LOG.info("DEBUG --- updateResourceRequest:" + - " application=" + applicationId + - " request=" + request); + if(LOG.isDebugEnabled()) { + LOG.debug("updateResourceRequest:" + " application=" + applicationId + + " request=" + request); + } } private ContainerLaunchContext createCLC(Container container) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index 727cd1a232..7abd7f8865 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -30,7 +30,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.records.AMResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -47,11 +47,11 @@ public class MockAM { private final ApplicationAttemptId attemptId; private final RMContext context; private final AMRMProtocol amRMProtocol; - + private final List requests = new ArrayList(); private final List releases = new ArrayList(); - MockAM(RMContext context, AMRMProtocol amRMProtocol, + MockAM(RMContext context, AMRMProtocol amRMProtocol, ApplicationAttemptId attemptId) { this.context = context; this.amRMProtocol = amRMProtocol; @@ -85,7 +85,7 @@ public void registerAppAttempt() throws Exception { amRMProtocol.registerApplicationMaster(req); } - public void addRequests(String[] hosts, int memory, int priority, + public void addRequests(String[] hosts, int memory, int priority, int containers) throws Exception { requests.addAll(createReq(hosts, memory, priority, containers)); } @@ -97,33 +97,33 @@ public AMResponse schedule() throws Exception { return response; } - public AMResponse allocate( - String host, int memory, int numContainers, + public AMResponse allocate( + String host, int memory, int numContainers, List releases) throws Exception { - List reqs = createReq(new String[]{host}, memory, 1, numContainers); + List reqs = createReq(new String[]{host}, memory, 1, numContainers); return allocate(reqs, releases); } - public List createReq(String[] hosts, int memory, int priority, + public List createReq(String[] hosts, int memory, int priority, int containers) throws Exception { List reqs = new ArrayList(); for (String host : hosts) { - ResourceRequest hostReq = createResourceReq(host, memory, priority, + ResourceRequest hostReq = createResourceReq(host, memory, priority, containers); reqs.add(hostReq); - ResourceRequest rackReq = createResourceReq("default-rack", memory, + ResourceRequest rackReq = createResourceReq("default-rack", memory, priority, containers); reqs.add(rackReq); } - - ResourceRequest offRackReq = createResourceReq("*", memory, priority, + + ResourceRequest offRackReq = createResourceReq("*", memory, priority, containers); reqs.add(offRackReq); return reqs; - + } - public ResourceRequest createResourceReq(String resource, int memory, int priority, + public ResourceRequest createResourceReq(String resource, int memory, int priority, int containers) throws Exception { ResourceRequest req = Records.newRecord(ResourceRequest.class); req.setHostName(resource); @@ -138,7 +138,7 @@ public ResourceRequest createResourceReq(String resource, int memory, int priori } public AMResponse allocate( - List resourceRequest, List releases) + List resourceRequest, List releases) throws Exception { AllocateRequest req = BuilderUtils.newAllocateRequest(attemptId, ++responseId, 0F, resourceRequest, releases); @@ -151,7 +151,7 @@ public void unregisterAppAttempt() throws Exception { FinishApplicationMasterRequest req = Records.newRecord(FinishApplicationMasterRequest.class); req.setAppAttemptId(attemptId); req.setDiagnostics(""); - req.setFinalState(""); + req.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED); req.setTrackingUrl(""); amRMProtocol.finishApplicationMaster(req); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java index 72ade5c1da..254ea0ad5a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java @@ -194,12 +194,11 @@ synchronized public StartContainerResponse startContainer( Resources.subtractFrom(available, containerLaunchContext.getResource()); Resources.addTo(used, containerLaunchContext.getResource()); - LOG.info("DEBUG --- startContainer:" + - " node=" + containerManagerAddress + - " application=" + applicationId + - " container=" + container + - " available=" + available + - " used=" + used); + if(LOG.isDebugEnabled()) { + LOG.debug("startContainer:" + " node=" + containerManagerAddress + + " application=" + applicationId + " container=" + container + + " available=" + available + " used=" + used); + } StartContainerResponse response = recordFactory.newRecordInstance(StartContainerResponse.class); return response; @@ -254,12 +253,11 @@ synchronized public StopContainerResponse stopContainer(StopContainerRequest req Resources.addTo(available, container.getResource()); Resources.subtractFrom(used, container.getResource()); - LOG.info("DEBUG --- stopContainer:" + - " node=" + containerManagerAddress + - " application=" + applicationId + - " container=" + containerID + - " available=" + available + - " used=" + used); + if(LOG.isDebugEnabled()) { + LOG.debug("stopContainer:" + " node=" + containerManagerAddress + + " application=" + applicationId + " container=" + containerID + + " available=" + available + " used=" + used); + } StopContainerResponse response = recordFactory.newRecordInstance(StopContainerResponse.class); return response; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index 605a0f363b..349a2cacbe 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -28,10 +28,12 @@ import org.apache.hadoop.yarn.api.records.AMResponse; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store; import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -71,8 +73,9 @@ public void test() throws Exception { RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); am1.registerAppAttempt(); - Assert.assertEquals(2 * GB, rm.getResourceScheduler().getUsedResource( - nm1.getNodeId()).getMemory()); + SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport( + nm1.getNodeId()); + Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemory()); RMApp app2 = rm.submitApp(2048); // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2 @@ -80,8 +83,9 @@ public void test() throws Exception { RMAppAttempt attempt2 = app2.getCurrentAppAttempt(); MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId()); am2.registerAppAttempt(); - Assert.assertEquals(2 * GB, rm.getResourceScheduler().getUsedResource( - nm2.getNodeId()).getMemory()); + SchedulerNodeReport report_nm2 = rm.getResourceScheduler().getNodeReport( + nm2.getNodeId()); + Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemory()); // add request for containers am1.addRequests(new String[] { "h1", "h2" }, GB, 1, 1); @@ -114,16 +118,14 @@ public void test() throws Exception { Assert.assertEquals(1, allocated2.size()); Assert.assertEquals(3 * GB, allocated2.get(0).getResource().getMemory()); Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId()); + + report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId()); + report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId()); + Assert.assertEquals(0, report_nm1.getAvailableResource().getMemory()); + Assert.assertEquals(2 * GB, report_nm2.getAvailableResource().getMemory()); - Assert.assertEquals(0, rm.getResourceScheduler().getAvailableResource( - nm1.getNodeId()).getMemory()); - Assert.assertEquals(2 * GB, rm.getResourceScheduler() - .getAvailableResource(nm2.getNodeId()).getMemory()); - - Assert.assertEquals(6 * GB, rm.getResourceScheduler().getUsedResource( - nm1.getNodeId()).getMemory()); - Assert.assertEquals(2 * GB, rm.getResourceScheduler().getUsedResource( - nm2.getNodeId()).getMemory()); + Assert.assertEquals(6 * GB, report_nm1.getUsedResource().getMemory()); + Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemory()); Container c1 = allocated1.get(0); Assert.assertEquals(GB, c1.getResource().getMemory()); @@ -138,8 +140,8 @@ public void test() throws Exception { } Assert.assertEquals(1, attempt1.getJustFinishedContainers().size()); Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size()); - Assert.assertEquals(5 * GB, rm.getResourceScheduler().getUsedResource( - nm1.getNodeId()).getMemory()); + report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId()); + Assert.assertEquals(5 * GB, report_nm1.getUsedResource().getMemory()); rm.stop(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index 01dd078b77..bda4d46e4e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -22,10 +22,11 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationStatus; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -69,7 +70,7 @@ public ApplicationStatus getStatus() { } @Override - public ApplicationState getState() { + public YarnApplicationState getState() { throw new UnsupportedOperationException("Not supported yet."); } @@ -119,7 +120,7 @@ public void setStatus(ApplicationStatus status) { } @Override - public void setState(ApplicationState state) { + public void setState(YarnApplicationState state) { throw new UnsupportedOperationException("Not supported yet."); } @@ -207,11 +208,11 @@ public ApplicationReport createAndGetApplicationReport() { } @Override public void handle(RMAppEvent event) { - throw new UnsupportedOperationException("Not supported yet."); + throw new UnsupportedOperationException("Not supported yet."); } @Override - public String getAMFinalState() { + public FinalApplicationStatus getFinalApplicationStatus() { throw new UnsupportedOperationException("Not supported yet."); } } @@ -274,9 +275,14 @@ public StringBuilder getDiagnostics() { public float getProgress() { return (float)Math.random(); } + @Override + public FinalApplicationStatus getFinalApplicationStatus() { + return FinalApplicationStatus.UNDEFINED; + } + }; } - + public static List newApplications(int n) { List list = Lists.newArrayList(); for (int i = 0; i < n; ++i) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java index 85d8432535..2ee54311f9 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java @@ -32,7 +32,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Priority; @@ -65,7 +65,7 @@ public class TestAMLaunchFailure { // private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); // ApplicationsManagerImpl asmImpl; // YarnScheduler scheduler = new DummyYarnScheduler(); -// ApplicationTokenSecretManager applicationTokenSecretManager = +// ApplicationTokenSecretManager applicationTokenSecretManager = // new ApplicationTokenSecretManager(); // private ClientRMService clientService; // @@ -98,7 +98,7 @@ public class TestAMLaunchFailure { // , ApplicationStore appStore) // throws IOException { // // TODO Auto-generated method stub -// +// // } // // @Override @@ -199,7 +199,7 @@ public class TestAMLaunchFailure { // conf.setLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 3000L); // conf.setInt(RMConfig.AM_MAX_RETRIES, 1); // asmImpl.init(conf); -// asmImpl.start(); +// asmImpl.start(); // } // // @After @@ -221,7 +221,7 @@ public class TestAMLaunchFailure { // .newRecordInstance(SubmitApplicationRequest.class); // request.setApplicationSubmissionContext(submissionContext); // clientService.submitApplication(request); -// AppAttempt application = context.getApplications().get(appID); +// AppAttempt application = context.getApplications().get(appID); // // while (application.getState() != ApplicationState.FAILED) { // LOG.info("Waiting for application to go to FAILED state." diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index 61e3821857..e9062bc3dd 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -33,7 +33,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -75,7 +75,7 @@ public class TestAMRestart { // private static final Log LOG = LogFactory.getLog(TestAMRestart.class); // ApplicationsManagerImpl appImpl; // RMContext asmContext = new RMContextImpl(new MemStore()); -// ApplicationTokenSecretManager appTokenSecretManager = +// ApplicationTokenSecretManager appTokenSecretManager = // new ApplicationTokenSecretManager(); // DummyResourceScheduler scheduler; // private ClientRMService clientRMService; @@ -90,7 +90,7 @@ public class TestAMRestart { // int launcherLaunchCalled = 0; // int launcherCleanupCalled = 0; // private final static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); -// +// // private class ExtApplicationsManagerImpl extends ApplicationsManagerImpl { // public ExtApplicationsManagerImpl( // ApplicationTokenSecretManager applicationTokenSecretManager, @@ -115,7 +115,7 @@ public class TestAMRestart { // LOG.info("DEBUG -- waiting for launch"); // synchronized(launchNotify) { // while (launchNotify.get() == 0) { -// try { +// try { // launchNotify.wait(); // } catch (InterruptedException e) { // } @@ -151,11 +151,11 @@ public class TestAMRestart { // } // // private class DummyResourceScheduler implements ResourceScheduler { -// +// // @Override // public void removeNode(RMNode node) { // } -// +// // @Override // public Allocation allocate(ApplicationId applicationId, // List ask, List release) throws IOException { @@ -222,7 +222,7 @@ public class TestAMRestart { // // @Override // public void nodeUpdate(RMNode nodeInfo, -// Map> containers) { +// Map> containers) { // } // // @Override @@ -253,7 +253,7 @@ public class TestAMRestart { // asmContext.getDispatcher().start(); // asmContext.getDispatcher().register(ApplicationTrackerEventType.class, scheduler); // appImpl = new ExtApplicationsManagerImpl(appTokenSecretManager, scheduler, asmContext); -// +// // conf.setLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 1000L); // conf.setInt(RMConfig.AM_MAX_RETRIES, maxFailures); // appImpl.init(conf); @@ -261,7 +261,7 @@ public class TestAMRestart { // // this.clientRMService = new ClientRMService(asmContext, appImpl // .getAmLivelinessMonitor(), appImpl.getClientToAMSecretManager(), -// scheduler); +// scheduler); // this.clientRMService.init(conf); // } // @@ -269,7 +269,7 @@ public class TestAMRestart { // public void tearDown() { // } // -// private void waitForFailed(AppAttempt application, ApplicationState +// private void waitForFailed(AppAttempt application, ApplicationState // finalState) throws Exception { // int count = 0; // while(application.getState() != finalState && count < 10) { @@ -292,7 +292,7 @@ public class TestAMRestart { // .newRecordInstance(SubmitApplicationRequest.class); // request.setApplicationSubmissionContext(subContext); // clientRMService.submitApplication(request); -// AppAttempt application = asmContext.getApplications().get(appID); +// AppAttempt application = asmContext.getApplications().get(appID); // synchronized (schedulerNotify) { // while(schedulerNotify.get() == 0) { // schedulerNotify.wait(); @@ -306,4 +306,4 @@ public class TestAMRestart { // waitForFailed(application, ApplicationState.FAILED); // stop = true; // } -} \ No newline at end of file +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java index 18975084a2..219a28fe19 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java @@ -26,7 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.event.EventHandler; @@ -152,7 +152,7 @@ public class TestASMStateMachine { // } // } // -// private void waitForState( ApplicationState +// private void waitForState( ApplicationState // finalState, AppAttemptImpl masterInfo) throws Exception { // int count = 0; // while(masterInfo.getState() != finalState && count < 10) { @@ -160,10 +160,10 @@ public class TestASMStateMachine { // count++; // } // Assert.assertEquals(finalState, masterInfo.getState()); -// } -// -// /* Test the state machine. -// * +// } +// +// /* Test the state machine. +// * // */ // @Test // public void testStateMachine() throws Exception { @@ -211,6 +211,6 @@ public class TestASMStateMachine { // /* check if expiry doesnt make it failed */ // handler.handle(new ApplicationEvent(ApplicationEventType.EXPIRE, // masterInfo.getApplicationID())); -// Assert.assertEquals(ApplicationState.COMPLETED, masterInfo.getState()); +// Assert.assertEquals(ApplicationState.COMPLETED, masterInfo.getState()); // } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java index ff4e798819..00ab9545a0 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java @@ -26,7 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; @@ -44,21 +44,21 @@ /** * A test case that tests the expiry of the application master. - * More tests can be added to this. + * More tests can be added to this. */ public class TestApplicationMasterExpiry { // private static final Log LOG = LogFactory.getLog(TestApplicationMasterExpiry.class); // private static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); -// +// // private final RMContext context = new RMContextImpl(new MemStore()); // private AMLivelinessMonitor amLivelinessMonitor; -// +// // @Before // public void setUp() { // new DummyApplicationTracker(); // new DummySN(); // new DummyLauncher(); -// new ApplicationEventTypeListener(); +// new ApplicationEventTypeListener(); // Configuration conf = new Configuration(); // context.getDispatcher().register(ApplicationEventType.class, // new ResourceManager.ApplicationEventDispatcher(context)); @@ -70,7 +70,7 @@ public class TestApplicationMasterExpiry { // amLivelinessMonitor.init(conf); // amLivelinessMonitor.start(); // } -// +// // private class DummyApplicationTracker implements EventHandler> { // DummyApplicationTracker() { // context.getDispatcher().register(ApplicationTrackerEventType.class, this); @@ -79,10 +79,10 @@ public class TestApplicationMasterExpiry { // public void handle(ASMEvent event) { // } // } -// +// // private AtomicInteger expiry = new AtomicInteger(); // private boolean expired = false; -// +// // private class ApplicationEventTypeListener implements // EventHandler { // ApplicationEventTypeListener() { @@ -100,7 +100,7 @@ public class TestApplicationMasterExpiry { // } // } // } -// +// // private class DummySN implements EventHandler> { // DummySN() { // context.getDispatcher().register(SNEventType.class, this); @@ -109,7 +109,7 @@ public class TestApplicationMasterExpiry { // public void handle(ASMEvent event) { // } // } -// +// // private class DummyLauncher implements EventHandler> { // DummyLauncher() { // context.getDispatcher().register(AMLauncherEventType.class, this); @@ -118,8 +118,8 @@ public class TestApplicationMasterExpiry { // public void handle(ASMEvent event) { // } // } -// -// private void waitForState(AppAttempt application, ApplicationState +// +// private void waitForState(AppAttempt application, ApplicationState // finalState) throws Exception { // int count = 0; // while(application.getState() != finalState && count < 10) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java index 459db70fbd..3780617eda 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java @@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -62,7 +62,7 @@ public class TestSchedulerNegotiator { // private SchedulerNegotiator schedulerNegotiator; // private DummyScheduler scheduler; // private final int testNum = 99999; -// +// // private final RMContext context = new RMContextImpl(new MemStore()); // AppAttemptImpl masterInfo; // private EventHandler handler; @@ -79,13 +79,13 @@ public class TestSchedulerNegotiator { // containers.add(container); // return new Allocation(containers, Resources.none()); // } -// -// +// +// // @Override // public void nodeUpdate(RMNode nodeInfo, // Map> containers) { // } -// +// // @Override // public void removeNode(RMNode node) { // } @@ -142,7 +142,7 @@ public class TestSchedulerNegotiator { // return null; // } // } -// +// // @Before // public void setUp() { // scheduler = new DummyScheduler(); @@ -153,12 +153,12 @@ public class TestSchedulerNegotiator { // context.getDispatcher().init(conf); // context.getDispatcher().start(); // } -// +// // @After // public void tearDown() { // schedulerNegotiator.stop(); // } -// +// // public void waitForState(ApplicationState state, AppAttemptImpl info) { // int count = 0; // while (info.getState() != state && count < 100) { @@ -184,7 +184,7 @@ public class TestSchedulerNegotiator { // submissionContext.setApplicationId(recordFactory.newRecordInstance(ApplicationId.class)); // submissionContext.getApplicationId().setClusterTimestamp(System.currentTimeMillis()); // submissionContext.getApplicationId().setId(1); -// +// // masterInfo = new AppAttemptImpl(this.context, this.conf, "dummy", // submissionContext, "dummyClientToken", StoreFactory // .createVoidAppStore(), new AMLivelinessMonitor(context diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index 03229c34b4..81e10092c8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.MockApps; @@ -167,8 +168,8 @@ public void handle(RMAppEvent event) { } @Override - public String getAMFinalState() { - return "UNKNOWN"; + public FinalApplicationStatus getFinalApplicationStatus() { + return FinalApplicationStatus.UNDEFINED; }; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 24408821e2..a46673f36d 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; @@ -192,10 +193,15 @@ private static void assertStartTimeSet(RMApp application) { } private static void assertAppState(RMAppState state, RMApp application) { - Assert.assertEquals("application state should have been" + state, + Assert.assertEquals("application state should have been " + state, state, application.getState()); } + private static void assertFinalAppStatus(FinalApplicationStatus status, RMApp application) { + Assert.assertEquals("Final application status should have been " + status, + status, application.getFinalApplicationStatus()); + } + // test to make sure times are set when app finishes private static void assertTimesAtFinish(RMApp application) { assertStartTimeSet(application); @@ -208,6 +214,7 @@ private static void assertTimesAtFinish(RMApp application) { private static void assertKilled(RMApp application) { assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); + assertFinalAppStatus(FinalApplicationStatus.KILLED, application); StringBuilder diag = application.getDiagnostics(); Assert.assertEquals("application diagnostics is not correct", "Application killed by user.", diag.toString()); @@ -224,6 +231,7 @@ private static void assertAppAndAttemptKilled(RMApp application) { private static void assertFailed(RMApp application, String regex) { assertTimesAtFinish(application); assertAppState(RMAppState.FAILED, application); + assertFinalAppStatus(FinalApplicationStatus.FAILED, application); StringBuilder diag = application.getDiagnostics(); Assert.assertTrue("application diagnostics is not correct", diag.toString().matches(regex)); @@ -261,6 +269,7 @@ protected RMApp testCreateAppRunning() throws IOException { application.handle(event); assertStartTimeSet(application); assertAppState(RMAppState.RUNNING, application); + assertFinalAppStatus(FinalApplicationStatus.UNDEFINED, application); return application; } @@ -273,6 +282,8 @@ protected RMApp testCreateAppFinished() throws IOException { application.handle(event); assertAppState(RMAppState.FINISHED, application); assertTimesAtFinish(application); + // finished without a proper unregister implies failed + assertFinalAppStatus(FinalApplicationStatus.FAILED, application); return application; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 03a4ba0744..7083197ad1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; @@ -52,7 +53,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; @@ -199,6 +202,7 @@ private void testAppAttemptNewState() { assertNull(applicationAttempt.getMasterContainer()); assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); + assertNull(applicationAttempt.getFinalApplicationStatus()); } /** @@ -212,6 +216,7 @@ private void testAppAttemptSubmittedState() { assertNull(applicationAttempt.getMasterContainer()); assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); + assertNull(applicationAttempt.getFinalApplicationStatus()); // Check events verify(masterService). @@ -230,6 +235,7 @@ private void testAppAttemptSubmittedToFailedState(String diagnostics) { assertNull(applicationAttempt.getMasterContainer()); assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); + assertNull(applicationAttempt.getFinalApplicationStatus()); // Check events verify(application).handle(any(RMAppRejectedEvent.class)); @@ -247,6 +253,7 @@ private void testAppAttemptKilledState(Container amContainer, assertEquals(amContainer, applicationAttempt.getMasterContainer()); assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); + assertNull(applicationAttempt.getFinalApplicationStatus()); } /** @@ -259,6 +266,7 @@ private void testAppAttemptScheduledState() { assertNull(applicationAttempt.getMasterContainer()); assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); + assertNull(applicationAttempt.getFinalApplicationStatus()); // Check events verify(application).handle(any(RMAppEvent.class)); @@ -299,6 +307,49 @@ private void testAppAttemptFailedState(Container container, verify(application, times(2)).handle(any(RMAppFailedAttemptEvent.class)); } + /** + * {@link RMAppAttemptState#LAUNCH} + */ + private void testAppAttemptLaunchedState(Container container) { + assertEquals(RMAppAttemptState.LAUNCHED, + applicationAttempt.getAppAttemptState()); + assertEquals(container, applicationAttempt.getMasterContainer()); + + // TODO - need to add more checks relevant to this state + } + + /** + * {@link RMAppAttemptState#RUNNING} + */ + private void testAppAttemptRunningState(Container container, + String host, int rpcPort, String trackingUrl) { + assertEquals(RMAppAttemptState.RUNNING, + applicationAttempt.getAppAttemptState()); + assertEquals(container, applicationAttempt.getMasterContainer()); + assertEquals(host, applicationAttempt.getHost()); + assertEquals(rpcPort, applicationAttempt.getRpcPort()); + assertEquals(trackingUrl, applicationAttempt.getTrackingUrl()); + + // TODO - need to add more checks relevant to this state + } + + /** + * {@link RMAppAttemptState#FINISHED} + */ + private void testAppAttemptFinishedState(Container container, + FinalApplicationStatus finalStatus, + String trackingUrl, + String diagnostics) { + assertEquals(RMAppAttemptState.FINISHED, + applicationAttempt.getAppAttemptState()); + assertEquals(diagnostics, applicationAttempt.getDiagnostics()); + assertEquals(trackingUrl, applicationAttempt.getTrackingUrl()); + assertEquals(0,applicationAttempt.getJustFinishedContainers().size()); + assertEquals(container, applicationAttempt.getMasterContainer()); + assertEquals(finalStatus, applicationAttempt.getFinalApplicationStatus()); + } + + private void submitApplicationAttempt() { ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId(); applicationAttempt.handle( @@ -339,6 +390,27 @@ private Container allocateApplicationAttempt() { return container; } + + private void launchApplicationAttempt(Container container) { + applicationAttempt.handle( + new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(), + RMAppAttemptEventType.LAUNCHED)); + + testAppAttemptLaunchedState(container); + } + + private void runApplicationAttempt(Container container, + String host, + int rpcPort, + String trackingUrl) { + applicationAttempt.handle( + new RMAppAttemptRegistrationEvent( + applicationAttempt.getAppAttemptId(), + host, rpcPort, trackingUrl)); + + testAppAttemptRunningState(container, host, rpcPort, trackingUrl); + } + @Test public void testNewToKilled() { @@ -400,4 +472,37 @@ public void testAllocatedToFailed() { testAppAttemptFailedState(amContainer, diagnostics); } + @Test + public void testUnregisterToKilledFinish() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 9999, "oldtrackingurl"); + String trackingUrl = "newtrackingurl"; + String diagnostics = "Killed by user"; + FinalApplicationStatus finalStatus = FinalApplicationStatus.KILLED; + applicationAttempt.handle( + new RMAppAttemptUnregistrationEvent( + applicationAttempt.getAppAttemptId(), + trackingUrl, finalStatus, diagnostics)); + testAppAttemptFinishedState(amContainer, finalStatus, + trackingUrl, diagnostics); + } + + + @Test + public void testUnregisterToSuccessfulFinish() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 9999, "oldtrackingurl"); + String trackingUrl = "mytrackingurl"; + String diagnostics = "Successful"; + FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; + applicationAttempt.handle( + new RMAppAttemptUnregistrationEvent( + applicationAttempt.getAppAttemptId(), + trackingUrl, finalStatus, diagnostics)); + testAppAttemptFinishedState(amContainer, finalStatus, + trackingUrl, diagnostics); + } + } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java index 0254c195df..fc326e9b74 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java @@ -25,7 +25,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import static org.apache.hadoop.test.MetricsAsserts.*; import static org.apache.hadoop.test.MockitoMaker.*; -import org.apache.hadoop.yarn.api.records.ApplicationState; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index e0583a2007..bfea484477 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -17,14 +17,20 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import java.io.IOException; import java.io.PrintWriter; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock; import org.apache.hadoop.yarn.webapp.test.WebAppTests; import org.junit.Test; import org.mockito.Mockito; +import com.google.inject.Binder; +import com.google.inject.Injector; +import com.google.inject.Module; + /** * This tests the NodesPage block table that it should contain the table body * data for all the columns in the table as specified in the header. @@ -33,23 +39,36 @@ public class TestNodesPage { @Test public void testNodesBlockRender() throws Exception { - int numberOfRacks = 2; - int numberOfNodesPerRack = 2; + final int numberOfRacks = 2; + final int numberOfNodesPerRack = 2; // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - int numberOfActualTableHeaders = 7; + final int numberOfThInMetricsTable = 9; + final int numberOfActualTableHeaders = 10; - PrintWriter writer = WebAppTests.testBlock( - NodesBlock.class, - RMContext.class, - TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack, - 8 * TestRMWebApp.GiB)).getInstance(PrintWriter.class); + Injector injector = WebAppTests.createMockInjector(RMContext.class, + TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB), + new Module() { + @Override + public void configure(Binder binder) { + try { + binder.bind(ResourceManager.class).toInstance(TestRMWebApp.mockRm(3, + numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB)); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + }); + injector.getInstance(NodesBlock.class).render(); + PrintWriter writer = injector.getInstance(PrintWriter.class); + WebAppTests.flushOutput(injector); - Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders)).print( + Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders + + numberOfThInMetricsTable)).print( " getRMNodes() { } public static ResourceManager mockRm(int apps, int racks, int nodes, - int mbsPerNode) - throws Exception { + int mbsPerNode) throws IOException { ResourceManager rm = mock(ResourceManager.class); RMContext rmContext = mockRMContext(apps, racks, nodes, mbsPerNode); @@ -110,7 +132,7 @@ public static ResourceManager mockRm(int apps, int racks, int nodes, return rm; } - public static CapacityScheduler mockCapacityScheduler() throws Exception { + public static CapacityScheduler mockCapacityScheduler() throws IOException { // stolen from TestCapacityScheduler CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); setupQueueConfiguration(conf); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java index 1b681628c9..e4178300c1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java @@ -209,9 +209,6 @@ public void test() throws IOException, InterruptedException { appToken.setService(new Text(schedulerAddressString)); currentUser.addToken(appToken); - conf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - SchedulerSecurityInfo.class, SecurityInfo.class); AMRMProtocol scheduler = currentUser.doAs(new PrivilegedAction() { @Override @@ -271,9 +268,6 @@ public AMRMProtocol run() { containerToken.getKind()), new Text( containerToken.getService())); currentUser.addToken(token); - conf.setClass( - YarnConfiguration.YARN_SECURITY_INFO, - ContainerManagerSecurityInfo.class, SecurityInfo.class); currentUser.doAs(new PrivilegedAction() { @Override public Void run() { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/pom.xml new file mode 100644 index 0000000000..83d7c29a28 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/pom.xml @@ -0,0 +1,31 @@ + + + + + hadoop-yarn + org.apache.hadoop + ${yarn.version} + + 4.0.0 + org.apache.hadoop + hadoop-yarn-site + hadoop-yarn-site + + + ${project.artifact.file} + ${project.parent.parent.basedir} + + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/CapacityScheduler.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/CapacityScheduler.apt.vm new file mode 100644 index 0000000000..0f1c3e406a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/CapacityScheduler.apt.vm @@ -0,0 +1,309 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop Map Reduce Next Generation-${project.version} - Capacity Scheduler + --- + --- + ${maven.build.timestamp} + +Hadoop MapReduce Next Generation - Capacity Scheduler + + \[ {{{./index.html}Go Back}} \] + +%{toc|section=1|fromDepth=0} + + +* {Purpose} + + This document describes the <<>>, a pluggable scheduler + for Hadoop which allows for multiple-tenants to securely share a large cluster + such that their applications are allocated resources in a timely manner under + constraints of allocated capacities. + +* {Overview} + + The <<>> is designed to run Hadoop applications as a + shared, multi-tenant cluster in an operator-friendly manner while maximizing + the throughput and the utilization of the cluster. + + Traditionally each organization has it own private set of compute resources + that have sufficient capacity to meet the organization's SLA under peak or + near peak conditions. This generally leads to poor average utilization and + overhead of managing multiple independent clusters, one per each organization. + Sharing clusters between organizations is a cost-effective manner of running + large Hadoop installations since this allows them to reap benefits of + economies of scale without creating private clusters. However, organizations + are concerned about sharing a cluster because they are worried about others + using the resources that are critical for their SLAs. + + The <<>> is designed to allow sharing a large cluster while + giving each organization capacity guarantees. The central idea is + that the available resources in the Hadoop cluster are shared among multiple + organizations who collectively fund the cluster based on their computing + needs. There is an added benefit that an organization can access + any excess capacity no being used by others. This provides elasticity for + the organizations in a cost-effective manner. + + Sharing clusters across organizations necessitates strong support for + multi-tenancy since each organization must be guaranteed capacity and + safe-guards to ensure the shared cluster is impervious to single rouge + application or user or sets thereof. The <<>> provides a + stringent set of limits to ensure that a single application or user or queue + cannot consume dispropotionate amount of resources in the cluster. Also, the + <<>> provides limits on initialized/pending applications + from a single user and queue to ensure fairness and stability of the cluster. + + The primary abstraction provided by the <<>> is the concept + of . These queues are typically setup by administrators to reflect the + economics of the shared cluster. + + To provide further control and predictability on sharing of resources, the + <<>> supports to ensure + resources are shared among the sub-queues of an organization before other + queues are allowed to use free resources, there-by providing + for sharing free resources among applications of a given organization. + +* {Features} + + The <<>> supports the following features: + + * Heirarchical Queues - Heirarchy of queues is supported to ensure resources + are shared among the sub-queues of an organization before other + queues are allowed to use free resources, there-by providing more control + and predictability. + + * Capacity Guarantees - Queues are allocated a fraction of the capacity of the + grid in the sense that a certain capacity of resources will be at their + disposal. All applications submitted to a queue will have access to the + capacity allocated to the queue. Adminstrators can configure soft limits and + optional hard limits on the capacity allocated to each queue. + + * Security - Each queue has strict ACLs which controls which users can submit + applications to individual queues. Also, there are safe-guards to ensure + that users cannot view and/or modify applications from other users. + Also, per-queue and system administrator roles are supported. + + * Elasticity - Free resources can be allocated to any queue beyond it's + capacity. When there is demand for these resources from queues running below + capacity at a future point in time, as tasks scheduled on these resources + complete, they will be assigned to applications on queues running below the + capacity. This ensures that resources are available in a predictable and + elastic manner to queues, thus preventing artifical silos of resources in + the cluster which helps utilization. + + * Multi-tenancy - Comprehensive set of limits are provided to prevent a + single application, user and queue from monpolizing resources of the queue + or the cluster as a whole to ensure that the cluster isn't overwhelmed. + + * Operability + + * Runtime Configuration - The queue definitions and properties such as + capacity, ACLs can be changed, at runtime, by administrators in a secure + manner to minimize disruption to users. Also, a console is provided for + users and administrators to view current allocation of resources to + various queues in the system. Administrators can also + at runtime. + + * Drain applications - Administrators can queues + at runtime to ensure that while existing applications run to completion, + no new applications can be submitted. If a queue is in <<>> + state, new applications cannot be submitted to or + . Existing applications continue to completion, + thus the queue can be gracefully. Administrators can also + the stopped queues. + + * Resource-based Scheduling - Support for resource-intensive applications, + where-in a application can optionally specify higher resource-requirements + than the default, there-by accomodating applications with differing resource + requirements. Currently, is the the resource requirement supported. + + [] + +* {Configuration} + + * Setting up <<>> to use <<>> + + To configure the <<>> to use the <<>>, set + the following property in the <>: + +*--------------------------------------+--------------------------------------+ +|| Property || Value | +*--------------------------------------+--------------------------------------+ +| <<>> | | +| | <<>> | +*--------------------------------------------+--------------------------------------------+ + + * Setting up + + <> is the configuration file for the + <<>>. + + The <<>> has a pre-defined queue called . All + queueus in the system are children of the root queue. + + Further queues can be setup by configuring + <<>> with a list of comma-separated + child queues. + + The configuration for <<>> uses a concept called + to configure the heirarchy of queues. The is the + full path of the queue's heirarcy, starting at , with . (dot) as the + delimiter. + + A given queue's children can be defined with the configuration knob: + <<.queues>>> + + Here is an example with three top-level child-queues <<>>, <<>> and + <<>> and some sub-queues for <<>> and <<>>: + +---- + + yarn.scheduler.capacity.root.queues + a,b,c + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.a.queues + a1,a2 + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.b.queues + b1,b2,b3 + The queues at the this level (root is the root queue). + + +---- + + * Queue Properties + + * Resource Allocation + +*--------------------------------------+--------------------------------------+ +|| Property || Description | +*--------------------------------------+--------------------------------------+ +| <<.capacity>>> | | +| | Queue in percentage (%). | +| | The sum of capacities for all queues, at each level, should be less than | +| | or equal to 100. | +| | Applications in the queue may consume more resources than the queue's | +| | capacity if there are free resources, providing elasticity. | +*--------------------------------------+--------------------------------------+ +| <<.maximum-capacity>>> | | +| | Maximum queue capacity in percentage (%). | +| | This limits the for applications in the queue. | +*--------------------------------------+--------------------------------------+ +| <<.minimum-user-limit-percent>>> | | +| | Each queue enforces a limit on the percentage of resources allocated to a | +| | user at any given time, if there is demand for resources. The user limit | +| | can vary between a minimum and maximum value. The former depends on the | +| | number of users who have submitted applications, and the latter is set to | +| | this property value. For e.g., suppose the value of this property is 25. | +| | If two users have submitted applications to a queue, no single user can | +| | use more than 50% of the queue resources. If a third user submits an | +| | application, no single user can use more than 33% of the queue resources. | +| | With 4 or more users, no user can use more than 25% of the queues | +| | resources. A value of 100 implies no user limits are imposed. | +*--------------------------------------+--------------------------------------+ +| <<.user-limit-factor>>> | | +| | The multiple of the queue capacity which can be configured to allow a | +| | single user to acquire more resources. By default this is set to 1 which | +| | ensures that a single user can never take more than the queue's configured | +| | capacity irrespective of how idle th cluster is. | +*--------------------------------------+--------------------------------------+ + + * Running and Pending Application Limits + + + The <<>> supports the following parameters to control + the runnign and pending applications: + + +*--------------------------------------+--------------------------------------+ +|| Property || Description | +*--------------------------------------+--------------------------------------+ +| <<>> | | +| | Maximum number of jobs in the system which can be concurently active | +| | both running and pending. Limits on each queue are directly proportional | +| | to their queue capacities. | +*--------------------------------------+--------------------------------------+ +| yarn.scheduler.capacity.maximum-am-resource-percent | | +| | Maximum percent of resources in the cluster which can be used to run | +| | application masters - controls number of concurrent running applications. | +*--------------------------------------+--------------------------------------+ + + * Queue Administration & Permissions + + The <<>> supports the following parameters to + the administer the queues: + + +*--------------------------------------+--------------------------------------+ +|| Property || Description | +*--------------------------------------+--------------------------------------+ +| <<.state>>> | | +| | The of the queue. Can be one of <<>> or <<>>. | +| | If a queue is in <<>> state, new applications cannot be | +| | submitted to or . | +| | Thus, if the queue is <<>> no applications can be | +| | submitted to the entire cluster. | +| | Existing applications continue to completion, thus the queue can be +| | gracefully. | +*--------------------------------------+--------------------------------------+ +| <<.acl_submit_jobs>>> | | +| | The which controls who can jobs to the given queue. | +| | If the given user/group has necessary ACLs on the given queue or | +| | they can submit jobs. | +*--------------------------------------+--------------------------------------+ +| <<.acl_administer_jobs>>> | | +| | The which controls who can jobs on the given queue. | +| | If the given user/group has necessary ACLs on the given queue or | +| | they can administer jobs. | +*--------------------------------------+--------------------------------------+ + + An is of the form , , . + The special value of <<*>> implies . The special value of + implies . + + * Reviewing the configuration of the CapacityScheduler + + Once the installation and configuration is completed, you can review it + after starting the YARN cluster from the web-ui. + + * Start the YARN cluster in the normal manner. + + * Open the <<>> web UI. + + * The web-page should show the resource usages of individual + queues. + + [] + +* {Changing Queue Configuration} + + Changing queue properties and adding new queues is very simple. You need to + edit <> and run . + +---- +$ vi $HADOOP_CONF_DIR/capacity-scheduler.xml +$ $YARN_HOME/bin/rmadmin -refreshQueues +---- + + Queues cannot be , only addition of new queues is supported - + the updated queue configuration should be a valid one i.e. queue-capacity at + each should be equal to 100%. + \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WritingYarnApplications.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WritingYarnApplications.apt.vm new file mode 100644 index 0000000000..fd9e48ee94 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WritingYarnApplications.apt.vm @@ -0,0 +1,781 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop Map Reduce Next Generation-${project.version} - Writing YARN + Applications + --- + --- + ${maven.build.timestamp} + +Hadoop MapReduce Next Generation - Writing YARN Applications + + \[ {{{./index.html}Go Back}} \] + +%{toc|section=1|fromDepth=1} + +* Purpose + + This document describes, at a high-level, the way to implement new + Applications for YARN. + +* Concepts and Flow + + The general concept is that an 'Application Submission Client' submits an + 'Application' to the YARN Resource Manager. The client communicates with the + ResourceManager using the 'ClientRMProtocol' to first acquire a new + 'ApplicationId' if needed via ClientRMProtocol#getNewApplication and then + submit the 'Application' to be run via ClientRMProtocol#submitApplication. As + part of the ClientRMProtocol#submitApplication call, the client needs to + provide sufficient information to the ResourceManager to 'launch' the + application's first container i.e. the ApplicationMaster. + You need to provide information such as the details about the local + files/jars that need to be available for your application to run, the actual + command that needs to be executed (with the necessary command line arguments), + any Unix environment settings (optional), etc. Effectively, you need to + describe the Unix process(es) that needs to be launched for your + ApplicationMaster. + + The YARN ResourceManager will then launch the ApplicationMaster (as specified) + on an allocated container. The ApplicationMaster is then expected to + communicate with the ResourceManager using the 'AMRMProtocol'. Firstly, the + ApplicationMaster needs to register itself with the ResourceManager. To + complete the task assigned to it, the ApplicationMaster can then request for + and receive containers via AMRMProtocol#allocate. After a container is + allocated to it, the ApplicationMaster communicates with the NodeManager using + ContainerManager#startContainer to launch the container for its task. As part + of launching this container, the ApplicationMaster has to specify the + ContainerLaunchContext which, similar to the ApplicationSubmissionContext, + has the launch information such as command line specification, environment, + etc. Once the task is completed, the ApplicationMaster has to signal the + ResourceManager of its completion via the AMRMProtocol#finishApplicationMaster. + + Meanwhile, the client can monitor the application's status by querying the + ResourceManager or by directly querying the ApplicationMaster if it supports + such a service. If needed, it can also kill the application via + ClientRMProtocol#forceKillApplication. + +* Interfaces + + The interfaces you'd most like be concerned with are: + + * ClientRMProtocol - Client\<--\>ResourceManager\ + The protocol for a client that wishes to communicate with the + ResourceManager to launch a new application (i.e. the ApplicationMaster), + check on the status of the application or kill the application. For example, + a job-client (a job launching program from the gateway) would use this + protocol. + + * AMRMProtocol - ApplicationMaster\<--\>ResourceManager\ + The protocol used by the ApplicationMaster to register/unregister itself + to/from the ResourceManager as well as to request for resources from the + Scheduler to complete its tasks. + + * ContainerManager - ApplicationMaster\<--\>NodeManager\ + The protocol used by the ApplicationMaster to talk to the NodeManager to + start/stop containers and get status updates on the containers if needed. + +* Writing a Simple Yarn Application + +** Writing a simple Client + + * The first step that a client needs to do is to connect to the + ResourceManager or to be more specific, the ApplicationsManager (AsM) + interface of the ResourceManager. + ++---+ + ClientRMProtocol applicationsManager; + YarnConfiguration yarnConf = new YarnConfiguration(conf); + InetSocketAddress rmAddress = + NetUtils.createSocketAddr(yarnConf.get( + YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS)); + LOG.info("Connecting to ResourceManager at " + rmAddress); + configuration appsManagerServerConf = new Configuration(conf); + appsManagerServerConf.setClass( + YarnConfiguration.YARN_SECURITY_INFO, + ClientRMSecurityInfo.class, SecurityInfo.class); + applicationsManager = ((ClientRMProtocol) rpc.getProxy( + ClientRMProtocol.class, rmAddress, appsManagerServerConf)); ++---+ + + * Once a handle is obtained to the ASM, the client needs to request the + ResourceManager for a new ApplicationId. + ++---+ + GetNewApplicationRequest request = + Records.newRecord(GetNewApplicationRequest.class); + GetNewApplicationResponse response = + applicationsManager.getNewApplication(request); + LOG.info("Got new ApplicationId=" + response.getApplicationId()); ++---+ + + * The response from the ASM for a new application also contains information + about the cluster such as the minimum/maximum resource capabilities of the + cluster. This is required so that to ensure that you can correctly set the + specifications of the container in which the ApplicationMaster would be + launched. Please refer to GetNewApplicationResponse for more details. + + * The main crux of a client is to setup the ApplicationSubmissionContext + which defines all the information needed by the ResourceManager to launch + the ApplicationMaster. A client needs to set the following into the context: + + * Application Info: id, name + + * Queue, Priority info: Queue to which the application will be submitted, + the priority to be assigned for the application. + + * User: The user submitting the application + + * ContainerLaunchContext: The information defining the container in which + the ApplicationMaster will be launched and run. The + ContainerLaunchContext, as mentioned previously, defines all the required + information needed to run the ApplicationMaster such as the local + resources (binaries, jars, files etc.), security tokens, environment + settings (CLASSPATH etc.) and the command to be executed. + + [] + ++---+ + // Create a new ApplicationSubmissionContext + ApplicationSubmissionContext appContext = + Records.newRecord(ApplicationSubmissionContext.class); + // set the ApplicationId + appContext.setApplicationId(appId); + // set the application name + appContext.setApplicationName(appName); + + // Create a new container launch context for the AM's container + ContainerLaunchContext amContainer = + Records.newRecord(ContainerLaunchContext.class); + + // Define the local resources required + Map localResources = + new HashMap(); + // Lets assume the jar we need for our ApplicationMaster is available in + // HDFS at a certain known path to us and we want to make it available to + // the ApplicationMaster in the launched container + Path jarPath; // <- known path to jar file + FileStatus jarStatus = fs.getFileStatus(jarPath); + LocalResource amJarRsrc = Records.newRecord(LocalResource.class); + // Set the type of resource - file or archive + // archives are untarred at the destination by the framework + amJarRsrc.setType(LocalResourceType.FILE); + // Set visibility of the resource + // Setting to most private option i.e. this file will only + // be visible to this instance of the running application + amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); + // Set the location of resource to be copied over into the + // working directory + amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); + // Set timestamp and length of file so that the framework + // can do basic sanity checks for the local resource + // after it has been copied over to ensure it is the same + // resource the client intended to use with the application + amJarRsrc.setTimestamp(jarStatus.getModificationTime()); + amJarRsrc.setSize(jarStatus.getLen()); + // The framework will create a symlink called AppMaster.jar in the + // working directory that will be linked back to the actual file. + // The ApplicationMaster, if needs to reference the jar file, would + // need to use the symlink filename. + localResources.put("AppMaster.jar", amJarRsrc); + // Set the local resources into the launch context + amContainer.setLocalResources(localResources); + + // Set up the environment needed for the launch context + Map env = new HashMap(); + // For example, we could setup the classpath needed. + // Assuming our classes or jars are available as local resources in the + // working directory from which the command will be run, we need to append + // "." to the path. + // By default, all the hadoop specific classpaths will already be available + // in $CLASSPATH, so we should be careful not to overwrite it. + String classPathEnv = "$CLASSPATH:./*:"; + env.put("CLASSPATH", classPathEnv); + amContainer.setEnvironment(env); + + // Construct the command to be executed on the launched container + String command = + "${JAVA_HOME}" + /bin/java" + + " MyAppMaster" + + " arg1 arg2 arg3" + + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"; + + List commands = new ArrayList(); + commands.add(command); + // add additional commands if needed + + // Set the command array into the container spec + amContainer.setCommands(commands); + + // Define the resource requirements for the container + // For now, YARN only supports memory so we set the memory + // requirements. + // If the process takes more than its allocated memory, it will + // be killed by the framework. + // Memory being requested for should be less than max capability + // of the cluster and all asks should be a multiple of the min capability. + Resource capability = Records.newRecord(Resource.class); + capability.setMemory(amMemory); + amContainer.setResource(capability); + + // Set the container launch content into the ApplicationSubmissionContext + appContext.setAMContainerSpec(amContainer); ++---+ + + * After the setup process is complete, the client is finally ready to submit + the application to the ASM. + ++---+ + // Create the request to send to the ApplicationsManager + SubmitApplicationRequest appRequest = + Records.newRecord(SubmitApplicationRequest.class); + appRequest.setApplicationSubmissionContext(appContext); + + // Submit the application to the ApplicationsManager + // Ignore the response as either a valid response object is returned on + // success or an exception thrown to denote the failure + applicationsManager.submitApplication(appRequest); ++---+ + + * At this point, the ResourceManager will have accepted the application and + in the background, will go through the process of allocating a container + with the required specifications and then eventually setting up and + launching the ApplicationMaster on the allocated container. + + * There are multiple ways a client can track progress of the actual task. + + * It can communicate with the ResourceManager and request for a report of + the application via ClientRMProtocol#getApplicationReport. + ++-----+ + GetApplicationReportRequest reportRequest = + Records.newRecord(GetApplicationReportRequest.class); + reportRequest.setApplicationId(appId); + GetApplicationReportResponse reportResponse = + applicationsManager.getApplicationReport(reportRequest); + ApplicationReport report = reportResponse.getApplicationReport(); ++-----+ + + The ApplicationReport received from the ResourceManager consists of the following: + + * General application information: ApplicationId, queue to which the + application was submitted, user who submitted the application and the + start time for the application. + + * ApplicationMaster details: the host on which the ApplicationMaster is + running, the rpc port (if any) on which it is listening for requests + from clients and a token that the client needs to communicate with + the ApplicationMaster. + + * Application tracking information: If the application supports some + form of progress tracking, it can set a tracking url which is + available via ApplicationReport#getTrackingUrl that a client can look + at to monitor progress. + + * ApplicationStatus: The state of the application as seen by the + ResourceManager is available via + ApplicationReport#getYarnApplicationState. If the + YarnApplicationState is set to FINISHED, the client should refer to + ApplicationReport#getFinalApplicationStatus to check for the actual + success/failure of the application task itself. In case of failures, + ApplicationReport#getDiagnostics may be useful to shed some more + light on the the failure. + + * If the ApplicationMaster supports it, a client can directly query the + ApplicationMaster itself for progress updates via the host:rpcport + information obtained from the ApplicationReport. It can also use the + tracking url obtained from the report if available. + + * In certain situations, if the application is taking too long or due to + other factors, the client may wish to kill the application. The + ClientRMProtocol supports the forceKillApplication call that allows a + client to send a kill signal to the ApplicationMaster via the + ResourceManager. An ApplicationMaster if so designed may also support an + abort call via its rpc layer that a client may be able to leverage. + ++---+ + KillApplicationRequest killRequest = + Records.newRecord(KillApplicationRequest.class); + killRequest.setApplicationId(appId); + applicationsManager.forceKillApplication(killRequest); ++---+ + +** Writing an ApplicationMaster + + * The ApplicationMaster is the actual owner of the job. It will be launched + by the ResourceManager and via the client will be provided all the necessary + information and resources about the job that it has been tasked with to + oversee and complete. + + * As the ApplicationMaster is launched within a container that may (likely + will) be sharing a physical host with other containers, given the + multi-tenancy nature, amongst other issues, it cannot make any assumptions + of things like pre-configured ports that it can listen on. + + * All interactions with the ResourceManager require an ApplicationAttemptId + (there can be multiple attempts per application in case of failures). When + the ApplicationMaster starts up, the ApplicationAttemptId associated with + this particular instance will be set in the environment. There are helper + apis to convert the value obtained from the environment into an + ApplicationAttemptId object. + ++---+ + Map envs = System.getenv(); + ApplicationAttemptId appAttemptID = + Records.newRecord(ApplicationAttemptId.class); + if (!envs.containsKey(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV)) { + // app attempt id should always be set in the env by the framework + throw new IllegalArgumentException( + "ApplicationAttemptId not set in the environment"); + } + appAttemptID = + ConverterUtils.toApplicationAttemptId( + envs.get(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV)); ++---+ + + * After an ApplicationMaster has initialized itself completely, it needs to + register with the ResourceManager via + AMRMProtocol#registerApplicationMaster. The ApplicationMaster always + communicate via the Scheduler interface of the ResourceManager. + ++---+ + // Connect to the Scheduler of the ResourceManager. + YarnConfiguration yarnConf = new YarnConfiguration(conf); + InetSocketAddress rmAddress = + NetUtils.createSocketAddr(yarnConf.get( + YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); + LOG.info("Connecting to ResourceManager at " + rmAddress); + AMRMProtocol resourceManager = + (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf); + + // Register the AM with the RM + // Set the required info into the registration request: + // ApplicationAttemptId, + // host on which the app master is running + // rpc port on which the app master accepts requests from the client + // tracking url for the client to track app master progress + RegisterApplicationMasterRequest appMasterRequest = + Records.newRecord(RegisterApplicationMasterRequest.class); + appMasterRequest.setApplicationAttemptId(appAttemptID); + appMasterRequest.setHost(appMasterHostname); + appMasterRequest.setRpcPort(appMasterRpcPort); + appMasterRequest.setTrackingUrl(appMasterTrackingUrl); + + // The registration response is useful as it provides information about the + // cluster. + // Similar to the GetNewApplicationResponse in the client, it provides + // information about the min/mx resource capabilities of the cluster that + // would be needed by the ApplicationMaster when requesting for containers. + RegisterApplicationMasterResponse response = + resourceManager.registerApplicationMaster(appMasterRequest); ++---+ + + * The ApplicationMaster has to emit heartbeats to the ResourceManager to keep + it informed that the ApplicationMaster is alive and still running. The + timeout expiry interval at the ResourceManager is defined by a config + setting accessible via YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS with the + default being defined by YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS. + The AMRMProtocol#allocate calls to the ResourceManager count as heartbeats + as it also supports sending progress update information. Therefore, an + allocate call with no containers requested and progress information updated + if any is a valid way for making heartbeat calls to the ResourceManager. + + * Based on the task requirements, the ApplicationMaster can ask for a set of + containers to run its tasks on. The ApplicationMaster has to use the + ResourceRequest class to define the following container specifications: + + * Hostname: If containers are required to be hosted on a particular rack or + a specific host. '*' is a special value that implies any host will do. + + * Resource capability: Currently, YARN only supports memory based resource + requirements so the request should define how much memory is needed. The + value is defined in MB and has to less than the max capability of the + cluster and an exact multiple of the min capability. + + * Priority: When asking for sets of containers, an ApplicationMaster may + define different priorities to each set. For example, the Map-Reduce + ApplicationMaster may assign a higher priority to containers needed + for the Map tasks and a lower priority for the Reduce tasks' containers. + + [] + ++----+ + // Resource Request + ResourceRequest rsrcRequest = Records.newRecord(ResourceRequest.class); + + // setup requirements for hosts + // whether a particular rack/host is needed + // useful for applications that are sensitive + // to data locality + rsrcRequest.setHostName("*"); + + // set the priority for the request + Priority pri = Records.newRecord(Priority.class); + pri.setPriority(requestPriority); + rsrcRequest.setPriority(pri); + + // Set up resource type requirements + // For now, only memory is supported so we set memory requirements + Resource capability = Records.newRecord(Resource.class); + capability.setMemory(containerMemory); + rsrcRequest.setCapability(capability); + + // set no. of containers needed + // matching the specifications + rsrcRequest.setNumContainers(numContainers); ++---+ + + * After defining the container requirements, the ApplicationMaster has to + construct an AllocateRequest to send to the ResourceManager. + The AllocateRequest consists of: + + * Requested containers: The container specifications and the no. of + containers being requested for by the ApplicationMaster from the + ResourceManager. + + * Released containers: There may be situations when the ApplicationMaster + may have requested for more containers that it needs or due to failure + issues, decide to use other containers allocated to it. In all such + situations, it is beneficial to the cluster if the ApplicationMaster + releases these containers back to the ResourceManager so that they can be + re-allocated to other applications. + + * ResponseId: The response id that will be sent back in the response from + the allocate call. + + * Progress update information: The ApplicationMaster can send its progress + update (range between to 0 to 1) to the ResourceManager. + + [] + ++---+ + List requestedContainers; + List releasedContainers + AllocateRequest req = Records.newRecord(AllocateRequest.class); + + // The response id set in the request will be sent back in + // the response so that the ApplicationMaster can + // match it to its original ask and act appropriately. + req.setResponseId(rmRequestID); + + // Set ApplicationAttemptId + req.setApplicationAttemptId(appAttemptID); + + // Add the list of containers being asked for + req.addAllAsks(requestedContainers); + + // If the ApplicationMaster has no need for certain + // containers due to over-allocation or for any other + // reason, it can release them back to the ResourceManager + req.addAllReleases(releasedContainers); + + // Assuming the ApplicationMaster can track its progress + req.setProgress(currentProgress); + + AllocateResponse allocateResponse = resourceManager.allocate(req); ++---+ + + * The AllocateResponse sent back from the ResourceManager provides the + following information via the AMResponse object: + + * Reboot flag: For scenarios when the ApplicationMaster may get out of sync + with the ResourceManager. + + * Allocated containers: The containers that have been allocated to the + ApplicationMaster. + + * Headroom: Headroom for resources in the cluster. Based on this information + and knowing its needs, an ApplicationMaster can make intelligent decisions + such as re-prioritizing sub-tasks to take advantage of currently allocated + containers, bailing out faster if resources are not becoming available + etc. + + * Completed containers: Once an ApplicationMaster triggers a launch an + allocated container, it will receive an update from the ResourceManager + when the container completes. The ApplicationMaster can look into the + status of the completed container and take appropriate actions such as + re-trying a particular sub-task in case of a failure. + + [] + + One thing to note is that containers will not be immediately allocated to + the ApplicationMaster. This does not imply that the ApplicationMaster should + keep on asking the pending count of required containers. Once an allocate + request has been sent, the ApplicationMaster will eventually be allocated + the containers based on cluster capacity, priorities and the scheduling + policy in place. The ApplicationMaster should only request for containers + again if and only if its original estimate changed and it needs additional + containers. + ++---+ + // Get AMResponse from AllocateResponse + AMResponse amResp = allocateResponse.getAMResponse(); + + // Retrieve list of allocated containers from the response + // and on each allocated container, lets assume we are launching + // the same job. + List allocatedContainers = amResp.getAllocatedContainers(); + for (Container allocatedContainer : allocatedContainers) { + LOG.info("Launching shell command on a new container." + + ", containerId=" + allocatedContainer.getId() + + ", containerNode=" + allocatedContainer.getNodeId().getHost() + + ":" + allocatedContainer.getNodeId().getPort() + + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() + + ", containerState" + allocatedContainer.getState() + + ", containerResourceMemory" + + allocatedContainer.getResource().getMemory()); + + + // Launch and start the container on a separate thread to keep the main + // thread unblocked as all containers may not be allocated at one go. + LaunchContainerRunnable runnableLaunchContainer = + new LaunchContainerRunnable(allocatedContainer); + Thread launchThread = new Thread(runnableLaunchContainer); + launchThreads.add(launchThread); + launchThread.start(); + } + + // Check what the current available resources in the cluster are + Resource availableResources = amResp.getAvailableResources(); + // Based on this information, an ApplicationMaster can make appropriate + // decisions + + // Check the completed containers + // Let's assume we are keeping a count of total completed containers, + // containers that failed and ones that completed successfully. + List completedContainers = + amResp.getCompletedContainersStatuses(); + for (ContainerStatus containerStatus : completedContainers) { + LOG.info("Got container status for containerID= " + + containerStatus.getContainerId() + + ", state=" + containerStatus.getState() + + ", exitStatus=" + containerStatus.getExitStatus() + + ", diagnostics=" + containerStatus.getDiagnostics()); + + int exitStatus = containerStatus.getExitStatus(); + if (0 != exitStatus) { + // container failed + // -100 is a special case where the container + // was aborted/pre-empted for some reason + if (-100 != exitStatus) { + // application job on container returned a non-zero exit code + // counts as completed + numCompletedContainers.incrementAndGet(); + numFailedContainers.incrementAndGet(); + } + else { + // something else bad happened + // app job did not complete for some reason + // we should re-try as the container was lost for some reason + // decrementing the requested count so that we ask for an + // additional one in the next allocate call. + numRequestedContainers.decrementAndGet(); + // we do not need to release the container as that has already + // been done by the ResourceManager/NodeManager. + } + } + else { + // nothing to do + // container completed successfully + numCompletedContainers.incrementAndGet(); + numSuccessfulContainers.incrementAndGet(); + } + } + } ++---+ + + + * After a container has been allocated to the ApplicationMaster, it needs to + follow a similar process that the Client followed in setting up the + ContainerLaunchContext for the eventual task that is going to be running on + the allocated Container. Once the ContainerLaunchContext is defined, the + ApplicationMaster can then communicate with the ContainerManager to start + its allocated container. + ++---+ + + //Assuming an allocated Container obtained from AMResponse + Container container; + // Connect to ContainerManager on the allocated container + String cmIpPortStr = container.getNodeId().getHost() + ":" + + container.getNodeId().getPort(); + InetSocketAddress cmAddress = NetUtils.createSocketAddr(cmIpPortStr); + ContainerManager cm = + (ContainerManager)rpc.getProxy(ContainerManager.class, cmAddress, conf); + + // Now we setup a ContainerLaunchContext + ContainerLaunchContext ctx = + Records.newRecord(ContainerLaunchContext.class); + + ctx.setContainerId(container.getId()); + ctx.setResource(container.getResource()); + + try { + ctx.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); + } catch (IOException e) { + LOG.info( + "Getting current user failed when trying to launch the container", + + e.getMessage()); + } + + // Set the environment + Map unixEnv; + // Setup the required env. + // Please note that the launched container does not inherit + // the environment of the ApplicationMaster so all the + // necessary environment settings will need to be re-setup + // for this allocated container. + ctx.setEnvironment(unixEnv); + + // Set the local resources + Map localResources = + new HashMap(); + // Again, the local resources from the ApplicationMaster is not copied over + // by default to the allocated container. Thus, it is the responsibility + // of the ApplicationMaster to setup all the necessary local resources + // needed by the job that will be executed on the allocated container. + + // Assume that we are executing a shell script on the allocated container + // and the shell script's location in the filesystem is known to us. + Path shellScriptPath; + LocalResource shellRsrc = Records.newRecord(LocalResource.class); + shellRsrc.setType(LocalResourceType.FILE); + shellRsrc.setVisibility(LocalResourceVisibility.APPLICATION); + shellRsrc.setResource( + ConverterUtils.getYarnUrlFromURI(new URI(shellScriptPath))); + shellRsrc.setTimestamp(shellScriptPathTimestamp); + shellRsrc.setSize(shellScriptPathLen); + localResources.put("MyExecShell.sh", shellRsrc); + + ctx.setLocalResources(localResources); + + // Set the necessary command to execute on the allocated container + String command = "/bin/sh ./MyExecShell.sh" + + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"; + + List commands = new ArrayList(); + commands.add(command); + ctx.setCommands(commands); + + // Send the start request to the ContainerManager + StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class); + startReq.setContainerLaunchContext(ctx); + cm.startContainer(startReq); ++---+ + + * The ApplicationMaster, as mentioned previously, will get updates of + completed containers as part of the response from the AMRMProtocol#allocate + calls. It can also monitor its launched containers pro-actively by querying + the ContainerManager for the status. + ++---+ + + GetContainerStatusRequest statusReq = + Records.newRecord(GetContainerStatusRequest.class); + statusReq.setContainerId(container.getId()); + GetContainerStatusResponse statusResp = cm.getContainerStatus(statusReq); + LOG.info("Container Status" + + ", id=" + container.getId() + + ", status=" + statusResp.getStatus()); ++---+ + +~~** Defining the context in which your code runs + +~~*** Container Resource Requests + +~~*** Local Resources + +~~*** Environment + +~~**** Managing the CLASSPATH + +~~** Security + +* FAQ + +** How can I distribute my application's jars to all of the nodes in the YARN + cluster that need it? + + You can use the LocalResource to add resources to your application request. + This will cause YARN to distribute the resource to the ApplicationMaster node. + If the resource is a tgz, zip, or jar - you can have YARN unzip it. Then, all + you need to do is add the unzipped folder to your classpath. + For example, when creating your application request: + ++---+ + File packageFile = new File(packagePath); + Url packageUrl = ConverterUtils.getYarnUrlFromPath( + FileContext.getFileContext.makeQualified(new Path(packagePath))); + + packageResource.setResource(packageUrl); + packageResource.setSize(packageFile.length()); + packageResource.setTimestamp(packageFile.lastModified()); + packageResource.setType(LocalResourceType.ARCHIVE); + packageResource.setVisibility(LocalResourceVisibility.APPLICATION); + + resource.setMemory(memory) + containerCtx.setResource(resource) + containerCtx.setCommands(ImmutableList.of( + "java -cp './package/*' some.class.to.Run " + + "1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout " + + "2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")) + containerCtx.setLocalResources( + Collections.singletonMap("package", packageResource)) + appCtx.setApplicationId(appId) + appCtx.setUser(user.getShortUserName) + appCtx.setAMContainerSpec(containerCtx) + request.setApplicationSubmissionContext(appCtx) + applicationsManager.submitApplication(request) ++---+ + + As you can see, the setLocalResources command takes a map of names to + resources. The name becomes a sym link in your application's cwd, so you can + just refer to the artifacts inside by using ./package/*. + + Note: Java's classpath (cp) argument is VERY sensitive. + Make sure you get the syntax EXACTLY correct. + + Once your package is distributed to your ApplicationMaster, you'll need to + follow the same process whenever your ApplicationMaster starts a new container + (assuming you want the resources to be sent to your container). The code for + this is the same. You just need to make sure that you give your + ApplicationMaster the package path (either HDFS, or local), so that it can + send the resource URL along with the container ctx. + +** How do I get the ApplicationMaster's ApplicationAttemptId? + + + The ApplicationAttemptId will be passed to the ApplicationMaster via the + environment and the value from the environment can be converted into an + ApplicationAttemptId object via the ConverterUtils helper function. + +** My container is being killed by the Node Manager + + This is likely due to high memory usage exceeding your requested container + memory size. There are a number of reasons that can cause this. First, look + at the process tree that the node manager dumps when it kills your container. + The two things you're interested in are physical memory and virtual memory. + If you have exceeded physical memory limits your app is using too much physical + memory. If you're running a Java app, you can use -hprof to look at what is + taking up space in the heap. If you have exceeded virtual memory, things are + slightly more complicated. + +* Useful Links + + * {{{https://issues.apache.org/jira/secure/attachment/12486023/MapReduce_NextGen_Architecture.pdf}Map Reduce Next Generation Architecture}} + + * {{{http://developer.yahoo.com/blogs/hadoop/posts/2011/03/mapreduce-nextgen-scheduler/}Map Reduce Next Generation Scheduler}} + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm new file mode 100644 index 0000000000..ceba4898fe --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm @@ -0,0 +1,49 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop MapReduce Next Generation ${project.version} + --- + --- + ${maven.build.timestamp} + +Hadoop MapReduce Next Generation + +* Architecture + + The new architecture introduced in hadoop-0.23, divides the two major + functions of the JobTracker: resource management and job life-cycle management + into separate components. + + The new ResourceManager manages the global assignment of compute resources to + applications and the per-application ApplicationMaster manages the + application’s scheduling and coordination. + + An application is either a single job in the sense of classic MapReduce jobs + or a DAG of such jobs. + + The ResourceManager and per-machine NodeManager daemon, which manages the + user processes on that machine, form the computation fabric. The + per-application ApplicationMaster is, in effect, a framework specific library + and is tasked with negotiating resources from the ResourceManager and working + with the NodeManager(s) to execute and monitor the tasks. + +* User Documentation + + * {{{./SingleCluster.html}Setting up a Single Node Cluster}} + + * {{{./apidocs/index.html}JavaDocs}} + + * {{{./WritingYarnApplications.html}Writing Yarn Applications}} + + * {{{./CapacityScheduler.html}Capacity Scheduler}} + diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/site.xml similarity index 100% rename from hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/site.xml diff --git a/hadoop-mapreduce-project/hadoop-yarn/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/pom.xml index aad5e4a137..051134904c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/pom.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/pom.xml @@ -424,5 +424,7 @@ hadoop-yarn-api hadoop-yarn-common hadoop-yarn-server + hadoop-yarn-applications + hadoop-yarn-site diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm deleted file mode 100644 index db9fe87034..0000000000 --- a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm +++ /dev/null @@ -1,39 +0,0 @@ -~~ Licensed under the Apache License, Version 2.0 (the "License"); -~~ you may not use this file except in compliance with the License. -~~ You may obtain a copy of the License at -~~ -~~ http://www.apache.org/licenses/LICENSE-2.0 -~~ -~~ Unless required by applicable law or agreed to in writing, software -~~ distributed under the License is distributed on an "AS IS" BASIS, -~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -~~ See the License for the specific language governing permissions and -~~ limitations under the License. See accompanying LICENSE file. - - --- - Hadoop MapReduce Next Generation ${project.version} - --- - --- - ${maven.build.timestamp} - -Hadoop MapReduce Next Generation - -* Architecture - - The new architecture introduced in 0.23, divides the two major functions - of the JobTracker, resource management and job scheduling/monitoring, into separate - components. - The new ResourceManager manages the global assignment of compute resources to applications - and the per-application ApplicationMaster manages the application’s scheduling and coordination. - An application is either a single job in the classic MapReduce jobs or a DAG of such jobs. - The ResourceManager and per-machine NodeManager server, which manages the user processes on that - machine, form the computation fabric. The per-application ApplicationMaster is, in effect, a - framework specific library and is tasked with negotiating resources from the ResourceManager - and working with the NodeManager(s) to execute and monitor the tasks. - -* User Documentation - - * {{{./SingleCluster.html}SingleCluster}} - - * {{{./apidocs/index.html}JavaDocs}} - diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestUserResolve.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestUserResolve.java index 26d35008bb..8050f33b79 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestUserResolve.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestUserResolve.java @@ -106,7 +106,7 @@ public void testRoundRobinResolver() throws Exception { // Check if the error message is as expected for non existent // user resource file. fs.delete(usersFilePath, false); - String expectedErrorMsg = "File " + userRsrc + " does not exist."; + String expectedErrorMsg = "File " + userRsrc + " does not exist"; validateBadUsersFile(rslv, userRsrc, expectedErrorMsg); // Check if the error message is as expected for empty user resource file diff --git a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java index 27629476d9..a019be7f34 100644 --- a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java @@ -180,7 +180,9 @@ protected void init() { void preProcessArgs() { verbose_ = false; - addTaskEnvironment_ = ""; + // Unset HADOOP_ROOT_LOGGER in case streaming job + // invokes additional hadoop commands. + addTaskEnvironment_ = "HADOOP_ROOT_LOGGER="; } void postProcessArgs() throws IOException { diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java index 0f4a1352ec..c32ff9108d 100644 --- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java +++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java @@ -697,7 +697,8 @@ public synchronized void initTasks() JobInitedEvent jie = new JobInitedEvent( profile.getJobID(), this.launchTime, numMapTasks, numReduceTasks, - JobStatus.getJobRunState(JobStatus.PREP)); + JobStatus.getJobRunState(JobStatus.PREP), + false); jobHistory.logEvent(jie, jobId); diff --git a/hadoop-mapreduce-project/src/packages/deb/hadoop.control/preinst b/hadoop-mapreduce-project/src/packages/deb/hadoop.control/preinst index 4cc3cc4f87..229834640c 100644 --- a/hadoop-mapreduce-project/src/packages/deb/hadoop.control/preinst +++ b/hadoop-mapreduce-project/src/packages/deb/hadoop.control/preinst @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -/usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/mapred mapred 2> /dev/null || : +/usr/sbin/useradd --comment "Hadoop MapReduce" -u 202 --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/mapred mapred 2> /dev/null || : diff --git a/hadoop-mapreduce-project/src/packages/deb/init.d/hadoop-historyserver b/hadoop-mapreduce-project/src/packages/deb/init.d/hadoop-historyserver new file mode 100644 index 0000000000..4421f5538e --- /dev/null +++ b/hadoop-mapreduce-project/src/packages/deb/init.d/hadoop-historyserver @@ -0,0 +1,143 @@ +#! /bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +### BEGIN INIT INFO +# Provides: hadoop-historyserver +# Required-Start: $remote_fs $syslog +# Required-Stop: $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: +# Short-Description: Apache Hadoop Job Tracker server +### END INIT INFO + +set -e + +# /etc/init.d/hadoop-historyserver: start and stop the Apache Hadoop Job History daemon + +test -x /usr/bin/hadoop || exit 0 +( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0 + +umask 022 + +if test -f /etc/default/hadoop-env.sh; then + . /etc/default/hadoop-env.sh +fi + +. /lib/lsb/init-functions + +# Are we running from init? +run_by_init() { + ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ] +} + +check_for_no_start() { + # forget it if we're trying to start, and /etc/hadoop/hadoop-historyserver_not_to_be_run exists + if [ -e /etc/hadoop/hadoop-historyserver_not_to_be_run ]; then + if [ "$1" = log_end_msg ]; then + log_end_msg 0 + fi + if ! run_by_init; then + log_action_msg "Apache Hadoop Job History server not in use (/etc/hadoop/hadoop-historyserver_not_to_be_run)" + fi + exit 0 + fi +} + +check_privsep_dir() { + # Create the PrivSep empty dir if necessary + if [ ! -d ${HADOOP_PID_DIR} ]; then + mkdir -p ${HADOOP_PID_DIR} + chown root:hadoop ${HADOOP_PID_DIR} + chmod 0775 ${HADOOP_PID_DIR} + fi +} + +export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin" +export HADOOP_PREFIX="/usr" + +case "$1" in + start) + check_privsep_dir + check_for_no_start + log_daemon_msg "Starting Apache Hadoop Job History server" "hadoop-historyserver" + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start historyserver; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + stop) + log_daemon_msg "Stopping Apache Hadoop Job History server" "hadoop-historyserver" + if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + + restart) + check_privsep_dir + log_daemon_msg "Restarting Apache Hadoop Job History server" "hadoop-historyserver" + start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid + check_for_no_start log_end_msg + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start historyserver; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + + try-restart) + check_privsep_dir + log_daemon_msg "Restarting Apache Hadoop Job History server" "hadoop-historyserver" + set +e + start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid + RET="$?" + set -e + case $RET in + 0) + # old daemon stopped + check_for_no_start log_end_msg + if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start historyserver; then + log_end_msg 0 + else + log_end_msg 1 + fi + ;; + 1) + # daemon not running + log_progress_msg "(not running)" + log_end_msg 0 + ;; + *) + # failed to stop + log_progress_msg "(failed to stop)" + log_end_msg 1 + ;; + esac + ;; + + status) + status_of_proc -p ${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid ${JAVA_HOME}/bin/java hadoop-historyserver && exit 0 || exit $? + ;; + + *) + log_action_msg "Usage: /etc/init.d/hadoop-historyserver {start|stop|restart|try-restart|status}" + exit 1 +esac + +exit 0 diff --git a/hadoop-mapreduce-project/src/packages/rpm/init.d/hadoop-historyserver b/hadoop-mapreduce-project/src/packages/rpm/init.d/hadoop-historyserver new file mode 100644 index 0000000000..71d1658327 --- /dev/null +++ b/hadoop-mapreduce-project/src/packages/rpm/init.d/hadoop-historyserver @@ -0,0 +1,85 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Starts a Hadoop historyserver +# +# chkconfig: 2345 90 10 +# description: Hadoop historyserver + +source /etc/rc.d/init.d/functions +source /etc/default/hadoop-env.sh + +RETVAL=0 +PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-historyserver.pid" +desc="Hadoop historyserver daemon" +export HADOOP_PREFIX="/usr" + +start() { + echo -n $"Starting $desc (hadoop-historyserver): " + daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start historyserver + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-historyserver + return $RETVAL +} + +stop() { + echo -n $"Stopping $desc (hadoop-historyserver): " + daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop historyserver + RETVAL=$? + sleep 5 + echo + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-historyserver $PIDFILE +} + +restart() { + stop + start +} + +checkstatus(){ + status -p $PIDFILE ${JAVA_HOME}/bin/java + RETVAL=$? +} + +condrestart(){ + [ -e /var/lock/subsys/hadoop-historyserver ] && restart || : +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + checkstatus + ;; + restart) + restart + ;; + condrestart) + condrestart + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|condrestart}" + exit 1 +esac + +exit $RETVAL diff --git a/hadoop-mapreduce-project/src/packages/rpm/spec/hadoop-mapred.spec b/hadoop-mapreduce-project/src/packages/rpm/spec/hadoop-mapred.spec index 70dfe9361b..85af48fd89 100644 --- a/hadoop-mapreduce-project/src/packages/rpm/spec/hadoop-mapred.spec +++ b/hadoop-mapreduce-project/src/packages/rpm/spec/hadoop-mapred.spec @@ -142,8 +142,8 @@ mv ${RPM_BUILD_DIR}/%{_final_name}/share/* ${RPM_BUILD_DIR}%{_share_dir} rm -rf ${RPM_BUILD_DIR}/%{_final_name}/etc %pre -getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop -/usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r --groups hadoop --home %{_var_dir}/mapred mapred 2> /dev/null || : +getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -g 123 -r hadoop +/usr/sbin/useradd --comment "Hadoop MapReduce" -u 202 --shell /bin/bash -M -r --groups hadoop --home %{_var_dir}/mapred mapred 2> /dev/null || : %post bash ${RPM_INSTALL_PREFIX0}/sbin/update-mapred-env.sh \ diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java index dd2649e6e4..a86f49e5a8 100644 --- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java +++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java @@ -125,10 +125,12 @@ HistoryEvent maybeEmitEvent(ParsedLine line, String jobIDName, String status = line.get("JOB_STATUS"); String totalMaps = line.get("TOTAL_MAPS"); String totalReduces = line.get("TOTAL_REDUCES"); + String uberized = line.get("UBERIZED"); if (launchTime != null && totalMaps != null && totalReduces != null) { return new JobInitedEvent(jobID, Long.parseLong(launchTime), Integer - .parseInt(totalMaps), Integer.parseInt(totalReduces), status); + .parseInt(totalMaps), Integer.parseInt(totalReduces), status, + Boolean.parseBoolean(uberized)); } return null;