diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 09f1c5a2d3..ae495be0e6 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -224,6 +224,10 @@ compile + + org.htrace + htrace-core + org.apache.zookeeper zookeeper diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 158445f836..2f482c290e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -88,6 +88,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; +import org.htrace.Trace; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -694,6 +695,9 @@ private synchronized void setupIOstreams() { if (LOG.isDebugEnabled()) { LOG.debug("Connecting to "+server); } + if (Trace.isTracing()) { + Trace.addTimelineAnnotation("IPC client connecting to " + server); + } short numRetries = 0; Random rand = null; while (true) { @@ -758,6 +762,10 @@ public AuthMethod run() // update last activity time touch(); + if (Trace.isTracing()) { + Trace.addTimelineAnnotation("IPC client connected to " + server); + } + // start the receiver thread after the socket connection has been set // up start(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 64615d22f8..0ccdb71d0e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -48,6 +48,9 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.Time; +import org.htrace.Sampler; +import org.htrace.Trace; +import org.htrace.TraceScope; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.BlockingService; @@ -191,6 +194,16 @@ public Object invoke(Object proxy, Method method, Object[] args) + method.getName() + "]"); } + TraceScope traceScope = null; + // if Tracing is on then start a new span for this rpc. + // guard it in the if statement to make sure there isn't + // any extra string manipulation. + if (Trace.isTracing()) { + traceScope = Trace.startSpan( + method.getDeclaringClass().getCanonicalName() + + "." + method.getName()); + } + RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); if (LOG.isTraceEnabled()) { @@ -212,8 +225,13 @@ public Object invoke(Object proxy, Method method, Object[] args) remoteId + ": " + method.getName() + " {" + e + "}"); } - + if (Trace.isTracing()) { + traceScope.getSpan().addTimelineAnnotation( + "Call got exception: " + e.getMessage()); + } throw new ServiceException(e); + } finally { + if (traceScope != null) traceScope.close(); } if (LOG.isDebugEnabled()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 24dd0c21b8..021e03537b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -79,6 +79,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseMessageWrapper; @@ -115,6 +116,10 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; +import org.htrace.Span; +import org.htrace.Trace; +import org.htrace.TraceInfo; +import org.htrace.TraceScope; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ByteString; @@ -506,6 +511,7 @@ public static class Call implements Schedulable { private ByteBuffer rpcResponse; // the response for this call private final RPC.RpcKind rpcKind; private final byte[] clientId; + private final Span traceSpan; // the tracing span on the server side public Call(int id, int retryCount, Writable param, Connection connection) { @@ -515,6 +521,11 @@ public Call(int id, int retryCount, Writable param, public Call(int id, int retryCount, Writable param, Connection connection, RPC.RpcKind kind, byte[] clientId) { + this(id, retryCount, param, connection, kind, clientId, null); + } + + public Call(int id, int retryCount, Writable param, Connection connection, + RPC.RpcKind kind, byte[] clientId, Span span) { this.callId = id; this.retryCount = retryCount; this.rpcRequest = param; @@ -523,6 +534,7 @@ public Call(int id, int retryCount, Writable param, Connection connection, this.rpcResponse = null; this.rpcKind = kind; this.clientId = clientId; + this.traceSpan = span; } @Override @@ -1921,9 +1933,18 @@ private void processRpcRequest(RpcRequestHeaderProto header, RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err); } + Span traceSpan = null; + if (header.hasTraceInfo()) { + // If the incoming RPC included tracing info, always continue the trace + TraceInfo parentSpan = new TraceInfo(header.getTraceInfo().getTraceId(), + header.getTraceInfo().getParentId()); + traceSpan = Trace.startSpan(rpcRequest.toString(), parentSpan).detach(); + } + Call call = new Call(header.getCallId(), header.getRetryCount(), - rpcRequest, this, ProtoUtil.convert(header.getRpcKind()), header - .getClientId().toByteArray()); + rpcRequest, this, ProtoUtil.convert(header.getRpcKind()), + header.getClientId().toByteArray(), traceSpan); + callQueue.put(call); // queue the call; maybe blocked here incRpcCount(); // Increment the rpc count } @@ -2067,6 +2088,7 @@ public void run() { ByteArrayOutputStream buf = new ByteArrayOutputStream(INITIAL_RESP_BUF_SIZE); while (running) { + TraceScope traceScope = null; try { final Call call = callQueue.take(); // pop the queue; maybe blocked here if (LOG.isDebugEnabled()) { @@ -2083,6 +2105,10 @@ public void run() { Writable value = null; CurCall.set(call); + if (call.traceSpan != null) { + traceScope = Trace.continueSpan(call.traceSpan); + } + try { // Make the call as the user via Subject.doAs, thus associating // the call with the Subject @@ -2156,9 +2182,22 @@ public Writable run() throws Exception { } catch (InterruptedException e) { if (running) { // unexpected -- log it LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); + if (Trace.isTracing()) { + traceScope.getSpan().addTimelineAnnotation("unexpectedly interrupted: " + + StringUtils.stringifyException(e)); + } } } catch (Exception e) { LOG.info(Thread.currentThread().getName() + " caught an exception", e); + if (Trace.isTracing()) { + traceScope.getSpan().addTimelineAnnotation("Exception: " + + StringUtils.stringifyException(e)); + } + } finally { + if (traceScope != null) { + traceScope.close(); + } + IOUtils.cleanup(LOG, traceScope); } } LOG.debug(Thread.currentThread().getName() + ": exiting"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 04ab4dc269..4b2dfe0de1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -41,6 +41,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.*; +import org.htrace.Trace; +import org.htrace.TraceScope; /** An RpcEngine implementation for Writable data. */ @InterfaceStability.Evolving @@ -227,9 +229,19 @@ public Object invoke(Object proxy, Method method, Object[] args) if (LOG.isDebugEnabled()) { startTime = Time.now(); } - - ObjectWritable value = (ObjectWritable) - client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), remoteId); + TraceScope traceScope = null; + if (Trace.isTracing()) { + traceScope = Trace.startSpan( + method.getDeclaringClass().getCanonicalName() + + "." + method.getName()); + } + ObjectWritable value; + try { + value = (ObjectWritable) + client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), remoteId); + } finally { + if (traceScope != null) traceScope.close(); + } if (LOG.isDebugEnabled()) { long callTime = Time.now() - startTime; LOG.debug("Call: " + method.getName() + " " + callTime); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java new file mode 100644 index 0000000000..b8c7b311ff --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.ShutdownHookManager; +import org.htrace.HTraceConfiguration; +import org.htrace.SpanReceiver; +import org.htrace.Trace; + +/** + * This class provides functions for reading the names of SpanReceivers from + * the Hadoop configuration, adding those SpanReceivers to the Tracer, + * and closing those SpanReceivers when appropriate. + * This class does nothing If no SpanReceiver is configured. + */ +@InterfaceAudience.Private +public class SpanReceiverHost { + public static final String SPAN_RECEIVERS_CONF_KEY = "hadoop.trace.spanreceiver.classes"; + private static final Log LOG = LogFactory.getLog(SpanReceiverHost.class); + private Collection receivers = new HashSet(); + private boolean closed = false; + + private static enum SingletonHolder { + INSTANCE; + Object lock = new Object(); + SpanReceiverHost host = null; + } + + public static SpanReceiverHost getInstance(Configuration conf) { + if (SingletonHolder.INSTANCE.host != null) { + return SingletonHolder.INSTANCE.host; + } + synchronized (SingletonHolder.INSTANCE.lock) { + if (SingletonHolder.INSTANCE.host != null) { + return SingletonHolder.INSTANCE.host; + } + SpanReceiverHost host = new SpanReceiverHost(); + host.loadSpanReceivers(conf); + SingletonHolder.INSTANCE.host = host; + ShutdownHookManager.get().addShutdownHook(new Runnable() { + public void run() { + SingletonHolder.INSTANCE.host.closeReceivers(); + } + }, 0); + return SingletonHolder.INSTANCE.host; + } + } + + /** + * Reads the names of classes specified in the + * "hadoop.trace.spanreceiver.classes" property and instantiates and registers + * them with the Tracer as SpanReceiver's. + * + * The nullary constructor is called during construction, but if the classes + * specified implement the Configurable interface, setConfiguration() will be + * called on them. This allows SpanReceivers to use values from the Hadoop + * configuration. + */ + public void loadSpanReceivers(Configuration conf) { + Class implClass = null; + String[] receiverNames = conf.getTrimmedStrings(SPAN_RECEIVERS_CONF_KEY); + if (receiverNames == null || receiverNames.length == 0) { + return; + } + for (String className : receiverNames) { + className = className.trim(); + try { + implClass = Class.forName(className); + receivers.add(loadInstance(implClass, conf)); + LOG.info("SpanReceiver " + className + " was loaded successfully."); + } catch (ClassNotFoundException e) { + LOG.warn("Class " + className + " cannot be found.", e); + } catch (IOException e) { + LOG.warn("Load SpanReceiver " + className + " failed.", e); + } + } + for (SpanReceiver rcvr : receivers) { + Trace.addReceiver(rcvr); + } + } + + private SpanReceiver loadInstance(Class implClass, Configuration conf) + throws IOException { + SpanReceiver impl; + try { + Object o = ReflectionUtils.newInstance(implClass, conf); + impl = (SpanReceiver)o; + impl.configure(wrapHadoopConf(conf)); + } catch (SecurityException e) { + throw new IOException(e); + } catch (IllegalArgumentException e) { + throw new IOException(e); + } catch (RuntimeException e) { + throw new IOException(e); + } + + return impl; + } + + private static HTraceConfiguration wrapHadoopConf(final Configuration conf) { + return new HTraceConfiguration() { + public static final String HTRACE_CONF_PREFIX = "hadoop."; + + @Override + public String get(String key) { + return conf.get(HTRACE_CONF_PREFIX + key); + } + + @Override + public String get(String key, String defaultValue) { + return conf.get(HTRACE_CONF_PREFIX + key, defaultValue); + } + }; + } + + /** + * Calls close() on all SpanReceivers created by this SpanReceiverHost. + */ + public synchronized void closeReceivers() { + if (closed) return; + closed = true; + for (SpanReceiver rcvr : receivers) { + try { + rcvr.close(); + } catch (IOException e) { + LOG.warn("Unable to close SpanReceiver correctly: " + e.getMessage(), e); + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java index 79f8692842..36b5ff11bc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java @@ -27,6 +27,8 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.UserGroupInformation; +import org.htrace.Span; +import org.htrace.Trace; import com.google.protobuf.ByteString; @@ -165,6 +167,15 @@ public static RpcRequestHeaderProto makeRpcRequestHeader(RPC.RpcKind rpcKind, RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder(); result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId) .setRetryCount(retryCount).setClientId(ByteString.copyFrom(uuid)); + + // Add tracing info if we are currently tracing. + if (Trace.isTracing()) { + Span s = Trace.currentSpan(); + result.setTraceInfo(RPCTraceInfoProto.newBuilder() + .setParentId(s.getSpanId()) + .setTraceId(s.getTraceId()).build()); + } + return result.build(); } } diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto index e8c4adac36..c8791508b5 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto @@ -53,6 +53,18 @@ enum RpcKindProto { +/** + * Used to pass through the information necessary to continue + * a trace after an RPC is made. All we need is the traceid + * (so we know the overarching trace this message is a part of), and + * the id of the current span when this message was sent, so we know + * what span caused the new span we will create when this message is received. + */ +message RPCTraceInfoProto { + optional int64 traceId = 1; + optional int64 parentId = 2; +} + message RpcRequestHeaderProto { // the header for the RpcRequest enum OperationProto { RPC_FINAL_PACKET = 0; // The final RPC Packet @@ -67,6 +79,7 @@ message RpcRequestHeaderProto { // the header for the RpcRequest // clientId + callId uniquely identifies a request // retry count, 1 means this is the first retry optional sint32 retryCount = 5 [default = -1]; + optional RPCTraceInfoProto traceInfo = 6; // tracing info } diff --git a/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm new file mode 100644 index 0000000000..f777dd23c1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm @@ -0,0 +1,169 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop Distributed File System-${project.version} - Enabling Dapper-like Tracing + --- + --- + ${maven.build.timestamp} + +Enabling Dapper-like Tracing in HDFS + +%{toc|section=1|fromDepth=0} + +* {Dapper-like Tracing in HDFS} + +** HTrace + + {{{https://issues.apache.org/jira/browse/HDFS-5274}HDFS-5274}} + added support for tracing requests through HDFS, + using the open source tracing library, {{{https://github.com/cloudera/htrace}HTrace}}. + Setting up tracing is quite simple, however it requires some very minor changes to your client code. + +** SpanReceivers + + The tracing system works by collecting information in structs called 'Spans'. + It is up to you to choose how you want to receive this information + by implementing the SpanReceiver interface, which defines one method: + ++---- +public void receiveSpan(Span span); ++---- + + Configure what SpanReceivers you'd like to use + by putting a comma separated list of the fully-qualified class name of + classes implementing SpanReceiver + in <<>> property: <<>>. + ++---- + + hadoop.trace.spanreceiver.classes + org.htrace.impl.LocalFileSpanReceiver + + + hadoop.local-file-span-receiver.path + /var/log/hadoop/htrace.out + ++---- + +** Setting up ZipkinSpanReceiver + + Instead of implementing SpanReceiver by yourself, + you can use <<>> which uses + {{{https://github.com/twitter/zipkin}Zipkin}} + for collecting and dispalying tracing data. + + In order to use <<>>, + you need to download and setup {{{https://github.com/twitter/zipkin}Zipkin}} first. + + you also need to add the jar of <<>> to the classpath of Hadoop on each node. + Here is example setup procedure. + ++---- + $ git clone https://github.com/cloudera/htrace + $ cd htrace/htrace-zipkin + $ mvn compile assembly:single + $ cp target/htrace-zipkin-*-jar-with-dependencies.jar $HADOOP_HOME/share/hadoop/hdfs/lib/ ++---- + + The sample configuration for <<>> is shown below. + By adding these to <<>> of NameNode and DataNodes, + <<>> is initialized on the startup. + You also need this configuration on the client node in addition to the servers. + ++---- + + hadoop.trace.spanreceiver.classes + org.htrace.impl.ZipkinSpanReceiver + + + hadoop.zipkin.collector-hostname + 192.168.1.2 + + + hadoop.zipkin.collector-port + 9410 + ++---- + +** Turning on tracing by HTrace API + + In order to turn on Dapper-like tracing, + you will need to wrap the traced logic with <> as shown below. + When there is running tracing spans, + the tracing information is propagated to servers along with RPC requests. + + In addition, you need to initialize <<>> once per process. + ++---- +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.tracing.SpanReceiverHost; +import org.htrace.Sampler; +import org.htrace.Trace; +import org.htrace.TraceScope; + +... + + SpanReceiverHost.getInstance(new HdfsConfiguration()); + +... + + TraceScope ts = Trace.startSpan("Gets", Sampler.ALWAYS); + try { + ... // traced logic + } finally { + if (ts != null) ts.close(); + } ++---- + +** Sample code for tracing + + The <<>> shown below is the wrapper of FsShell + which start tracing span before invoking HDFS shell command. + ++---- +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FsShell; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.tracing.SpanReceiverHost; +import org.apache.hadoop.util.ToolRunner; +import org.htrace.Sampler; +import org.htrace.Trace; +import org.htrace.TraceScope; + +public class TracingFsShell { + public static void main(String argv[]) throws Exception { + Configuration conf = new Configuration(); + FsShell shell = new FsShell(); + conf.setQuietMode(false); + shell.setConf(conf); + int res = 0; + SpanReceiverHost.init(new HdfsConfiguration()); + TraceScope ts = null; + try { + ts = Trace.startSpan("FsShell", Sampler.ALWAYS); + res = ToolRunner.run(shell, argv); + } finally { + shell.close(); + if (ts != null) ts.close(); + } + System.exit(res); + } +} ++---- + + You can compile and execute this code as shown below. + ++---- +$ javac -cp `hadoop classpath` TracingFsShell.java +$ HADOOP_CLASSPATH=. hdfs TracingFsShell -put sample.txt /tmp/ ++---- diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 9b026f2bdb..81eae0ab51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -181,6 +181,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> xercesImpl compile + + org.htrace + htrace-core + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index df50eabacb..1ec91d005b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -180,6 +180,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; +import org.apache.hadoop.tracing.SpanReceiverHost; import org.mortbay.util.ajax.JSON; import com.google.common.annotations.VisibleForTesting; @@ -326,6 +327,8 @@ public static InetSocketAddress createSocketAddr(String target) { private boolean isPermissionEnabled; private String dnUserName = null; + private SpanReceiverHost spanReceiverHost; + /** * Create the DataNode given a configuration, an array of dataDirs, * and a namenode proxy @@ -823,6 +826,7 @@ void startDataNode(Configuration conf, this.dataDirs = dataDirs; this.conf = conf; this.dnConf = new DNConf(conf); + this.spanReceiverHost = SpanReceiverHost.getInstance(conf); if (dnConf.maxLockedMemory > 0) { if (!NativeIO.POSIX.getCacheManipulator().verifyCanMlock()) { @@ -1510,6 +1514,9 @@ public void shutdown() { MBeans.unregister(dataNodeInfoBeanName); dataNodeInfoBeanName = null; } + if (this.spanReceiverHost != null) { + this.spanReceiverHost.closeReceivers(); + } if (shortCircuitRegistry != null) shortCircuitRegistry.shutdown(); LOG.info("Shutdown complete."); synchronized(this) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 4072b1720d..bcb5a8697d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -60,6 +60,7 @@ import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol; +import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.util.ExitUtil.ExitException; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.JvmPauseMonitor; @@ -278,6 +279,7 @@ public long getProtocolVersion(String protocol, private JvmPauseMonitor pauseMonitor; private ObjectName nameNodeStatusBeanName; + private SpanReceiverHost spanReceiverHost; /** * The namenode address that clients will use to access this namenode * or the name service. For HA configurations using logical URI, it @@ -586,6 +588,9 @@ protected void initialize(Configuration conf) throws IOException { if (NamenodeRole.NAMENODE == role) { startHttpServer(conf); } + + this.spanReceiverHost = SpanReceiverHost.getInstance(conf); + loadNamesystem(conf); rpcServer = createRpcServer(conf); @@ -822,6 +827,9 @@ public void stop() { MBeans.unregister(nameNodeStatusBeanName); nameNodeStatusBeanName = null; } + if (this.spanReceiverHost != null) { + this.spanReceiverHost.closeReceivers(); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java new file mode 100644 index 0000000000..bb923a2c6b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import org.apache.commons.lang.RandomStringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.htrace.HTraceConfiguration; +import org.htrace.Sampler; +import org.htrace.Span; +import org.htrace.SpanReceiver; +import org.htrace.Trace; +import org.htrace.TraceScope; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TestTracing { + + private static Configuration conf; + private static MiniDFSCluster cluster; + private static DistributedFileSystem dfs; + + @Test + public void testSpanReceiverHost() throws Exception { + Configuration conf = new Configuration(); + conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY, + SetSpanReceiver.class.getName()); + SpanReceiverHost spanReceiverHost = SpanReceiverHost.getInstance(conf); + } + + @Test + public void testWriteTraceHooks() throws Exception { + long startTime = System.currentTimeMillis(); + TraceScope ts = Trace.startSpan("testWriteTraceHooks", Sampler.ALWAYS); + Path file = new Path("traceWriteTest.dat"); + FSDataOutputStream stream = dfs.create(file); + + for (int i = 0; i < 10; i++) { + byte[] data = RandomStringUtils.randomAlphabetic(102400).getBytes(); + stream.write(data); + } + stream.hflush(); + stream.close(); + long endTime = System.currentTimeMillis(); + ts.close(); + + String[] expectedSpanNames = { + "testWriteTraceHooks", + "org.apache.hadoop.hdfs.protocol.ClientProtocol.create", + "org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.create", + "org.apache.hadoop.hdfs.protocol.ClientProtocol.fsync", + "org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.fsync", + "org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.complete" + }; + assertSpanNamesFound(expectedSpanNames); + + // The trace should last about the same amount of time as the test + Map> map = SetSpanReceiver.SetHolder.getMap(); + Span s = map.get("testWriteTraceHooks").get(0); + Assert.assertNotNull(s); + long spanStart = s.getStartTimeMillis(); + long spanEnd = s.getStopTimeMillis(); + Assert.assertTrue(spanStart - startTime < 100); + Assert.assertTrue(spanEnd - endTime < 100); + + // There should only be one trace id as it should all be homed in the + // top trace. + for (Span span : SetSpanReceiver.SetHolder.spans) { + Assert.assertEquals(ts.getSpan().getTraceId(), span.getTraceId()); + } + } + + @Test + public void testWriteWithoutTraceHooks() throws Exception { + Path file = new Path("withoutTraceWriteTest.dat"); + FSDataOutputStream stream = dfs.create(file); + for (int i = 0; i < 10; i++) { + byte[] data = RandomStringUtils.randomAlphabetic(102400).getBytes(); + stream.write(data); + } + stream.hflush(); + stream.close(); + Assert.assertTrue(SetSpanReceiver.SetHolder.size() == 0); + } + + @Test + public void testReadTraceHooks() throws Exception { + String fileName = "traceReadTest.dat"; + Path filePath = new Path(fileName); + + // Create the file. + FSDataOutputStream ostream = dfs.create(filePath); + for (int i = 0; i < 50; i++) { + byte[] data = RandomStringUtils.randomAlphabetic(10240).getBytes(); + ostream.write(data); + } + ostream.close(); + + + long startTime = System.currentTimeMillis(); + TraceScope ts = Trace.startSpan("testReadTraceHooks", Sampler.ALWAYS); + FSDataInputStream istream = dfs.open(filePath, 10240); + ByteBuffer buf = ByteBuffer.allocate(10240); + + int count = 0; + try { + while (istream.read(buf) > 0) { + count += 1; + buf.clear(); + istream.seek(istream.getPos() + 5); + } + } catch (IOException ioe) { + // Ignore this it's probably a seek after eof. + } finally { + istream.close(); + } + ts.getSpan().addTimelineAnnotation("count: " + count); + long endTime = System.currentTimeMillis(); + ts.close(); + + String[] expectedSpanNames = { + "testReadTraceHooks", + "org.apache.hadoop.hdfs.protocol.ClientProtocol.getBlockLocations", + "org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.getBlockLocations" + }; + assertSpanNamesFound(expectedSpanNames); + + // The trace should last about the same amount of time as the test + Map> map = SetSpanReceiver.SetHolder.getMap(); + Span s = map.get("testReadTraceHooks").get(0); + Assert.assertNotNull(s); + + long spanStart = s.getStartTimeMillis(); + long spanEnd = s.getStopTimeMillis(); + Assert.assertTrue(spanStart - startTime < 100); + Assert.assertTrue(spanEnd - endTime < 100); + + // There should only be one trace id as it should all be homed in the + // top trace. + for (Span span : SetSpanReceiver.SetHolder.spans) { + Assert.assertEquals(ts.getSpan().getTraceId(), span.getTraceId()); + } + } + + @Test + public void testReadWithoutTraceHooks() throws Exception { + String fileName = "withoutTraceReadTest.dat"; + Path filePath = new Path(fileName); + + // Create the file. + FSDataOutputStream ostream = dfs.create(filePath); + for (int i = 0; i < 50; i++) { + byte[] data = RandomStringUtils.randomAlphabetic(10240).getBytes(); + ostream.write(data); + } + ostream.close(); + + FSDataInputStream istream = dfs.open(filePath, 10240); + ByteBuffer buf = ByteBuffer.allocate(10240); + + int count = 0; + try { + while (istream.read(buf) > 0) { + count += 1; + buf.clear(); + istream.seek(istream.getPos() + 5); + } + } catch (IOException ioe) { + // Ignore this it's probably a seek after eof. + } finally { + istream.close(); + } + Assert.assertTrue(SetSpanReceiver.SetHolder.size() == 0); + } + + @Before + public void cleanSet() { + SetSpanReceiver.SetHolder.spans.clear(); + } + + @BeforeClass + public static void setupCluster() throws IOException { + conf = new Configuration(); + conf.setLong("dfs.blocksize", 100 * 1024); + conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY, + SetSpanReceiver.class.getName()); + + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(3) + .build(); + + dfs = cluster.getFileSystem(); + } + + @AfterClass + public static void shutDown() throws IOException { + cluster.shutdown(); + } + + private void assertSpanNamesFound(String[] expectedSpanNames) { + Map> map = SetSpanReceiver.SetHolder.getMap(); + for (String spanName : expectedSpanNames) { + Assert.assertTrue("Should find a span with name " + spanName, map.get(spanName) != null); + } + } + + /** + * Span receiver that puts all spans into a single set. + * This is useful for testing. + *

+ * We're not using HTrace's POJOReceiver here so as that doesn't + * push all the metrics to a static place, and would make testing + * SpanReceiverHost harder. + */ + public static class SetSpanReceiver implements SpanReceiver { + + public void configure(HTraceConfiguration conf) { + } + + public void receiveSpan(Span span) { + SetHolder.spans.add(span); + } + + public void close() { + } + + public static class SetHolder { + public static Set spans = new HashSet(); + + public static int size() { + return spans.size(); + } + + public static Map> getMap() { + Map> map = new HashMap>(); + + for (Span s : spans) { + List l = map.get(s.getDescription()); + if (l == null) { + l = new LinkedList(); + map.put(s.getDescription(), l); + } + l.add(s); + } + return map; + } + } + } +} diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index be5b3d51b5..beaeec63a7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -677,6 +677,11 @@ jsch 0.1.42 + + org.htrace + htrace-core + 3.0.4 + org.jdom jdom diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 56288ee60c..a42aff0a38 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -65,6 +65,7 @@ +