From d8e386f01aabfabd1b8d1ebe8436b0e4b1860bb9 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Fri, 14 Sep 2012 01:44:23 +0000 Subject: [PATCH] HADOOP-8755. Print thread dump when tests fail due to timeout. Contributed by Andrey Klochkov. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384627 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../test/TestTimedOutTestsListener.java | 181 ++++++++++++++++++ .../hadoop/test/TimedOutTestsListener.java | 166 ++++++++++++++++ .../hadoop-hdfs-httpfs/pom.xml | 6 + hadoop-hdfs-project/hadoop-hdfs/pom.xml | 6 + .../hadoop-mapreduce-client/pom.xml | 12 ++ hadoop-mapreduce-project/pom.xml | 12 ++ hadoop-yarn-project/hadoop-yarn/pom.xml | 14 +- 8 files changed, 399 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestTimedOutTestsListener.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TimedOutTestsListener.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c33fed5e40..6eb7d6c34d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR. (Andy Isaacson via eli) + HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey + Klochkov via atm) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestTimedOutTestsListener.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestTimedOutTestsListener.java new file mode 100644 index 0000000000..62748b4da6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestTimedOutTestsListener.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.test; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.notification.Failure; + +public class TestTimedOutTestsListener { + + public static class Deadlock { + private CyclicBarrier barrier = new CyclicBarrier(6); + + public Deadlock() { + DeadlockThread[] dThreads = new DeadlockThread[6]; + + Monitor a = new Monitor("a"); + Monitor b = new Monitor("b"); + Monitor c = new Monitor("c"); + dThreads[0] = new DeadlockThread("MThread-1", a, b); + dThreads[1] = new DeadlockThread("MThread-2", b, c); + dThreads[2] = new DeadlockThread("MThread-3", c, a); + + Lock d = new ReentrantLock(); + Lock e = new ReentrantLock(); + Lock f = new ReentrantLock(); + + dThreads[3] = new DeadlockThread("SThread-4", d, e); + dThreads[4] = new DeadlockThread("SThread-5", e, f); + dThreads[5] = new DeadlockThread("SThread-6", f, d); + + // make them daemon threads so that the test will exit + for (int i = 0; i < 6; i++) { + dThreads[i].setDaemon(true); + dThreads[i].start(); + } + } + + class DeadlockThread extends Thread { + private Lock lock1 = null; + + private Lock lock2 = null; + + private Monitor mon1 = null; + + private Monitor mon2 = null; + + private boolean useSync; + + DeadlockThread(String name, Lock lock1, Lock lock2) { + super(name); + this.lock1 = lock1; + this.lock2 = lock2; + this.useSync = true; + } + + DeadlockThread(String name, Monitor mon1, Monitor mon2) { + super(name); + this.mon1 = mon1; + this.mon2 = mon2; + this.useSync = false; + } + + public void run() { + if (useSync) { + syncLock(); + } else { + monitorLock(); + } + } + + private void syncLock() { + lock1.lock(); + try { + try { + barrier.await(); + } catch (Exception e) { + } + goSyncDeadlock(); + } finally { + lock1.unlock(); + } + } + + private void goSyncDeadlock() { + try { + barrier.await(); + } catch (Exception e) { + } + lock2.lock(); + throw new RuntimeException("should not reach here."); + } + + private void monitorLock() { + synchronized (mon1) { + try { + barrier.await(); + } catch (Exception e) { + } + goMonitorDeadlock(); + } + } + + private void goMonitorDeadlock() { + try { + barrier.await(); + } catch (Exception e) { + } + synchronized (mon2) { + throw new RuntimeException(getName() + " should not reach here."); + } + } + } + + class Monitor { + String name; + + Monitor(String name) { + this.name = name; + } + } + + } + + @Test(timeout=500) + public void testThreadDumpAndDeadlocks() throws Exception { + new Deadlock(); + String s = null; + while (true) { + s = TimedOutTestsListener.buildDeadlockInfo(); + if (s != null) + break; + Thread.sleep(100); + } + + Assert.assertEquals(3, countStringOccurrences(s, "BLOCKED")); + + Failure failure = new Failure( + null, new Exception(TimedOutTestsListener.TEST_TIMED_OUT_PREFIX)); + StringWriter writer = new StringWriter(); + new TimedOutTestsListener(new PrintWriter(writer)).testFailure(failure); + String out = writer.toString(); + + Assert.assertTrue(out.contains("THREAD DUMP")); + Assert.assertTrue(out.contains("DEADLOCKS DETECTED")); + + System.out.println(out); + } + + private int countStringOccurrences(String s, String substr) { + int n = 0; + int index = 0; + while ((index = s.indexOf(substr, index) + 1) != 0) { + n++; + } + return n; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TimedOutTestsListener.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TimedOutTestsListener.java new file mode 100644 index 0000000000..a10edbbd27 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TimedOutTestsListener.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.test; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.LockInfo; +import java.lang.management.ManagementFactory; +import java.lang.management.MonitorInfo; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Map; + +import org.junit.runner.notification.Failure; +import org.junit.runner.notification.RunListener; + +/** + * JUnit run listener which prints full thread dump into System.err + * in case a test is failed due to timeout. + */ +public class TimedOutTestsListener extends RunListener { + + static final String TEST_TIMED_OUT_PREFIX = "test timed out after"; + + private static String INDENT = " "; + + private final PrintWriter output; + + public TimedOutTestsListener() { + this.output = new PrintWriter(System.err); + } + + public TimedOutTestsListener(PrintWriter output) { + this.output = output; + } + + @Override + public void testFailure(Failure failure) throws Exception { + if (failure != null && failure.getMessage() != null + && failure.getMessage().startsWith(TEST_TIMED_OUT_PREFIX)) { + output.println("====> TEST TIMED OUT. PRINTING THREAD DUMP. <===="); + output.println(); + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); + output.println(String.format("Timestamp: %s", dateFormat.format(new Date()))); + output.println(); + output.println(buildThreadDump()); + + String deadlocksInfo = buildDeadlockInfo(); + if (deadlocksInfo != null) { + output.println("====> DEADLOCKS DETECTED <===="); + output.println(); + output.println(deadlocksInfo); + } + } + } + + static String buildThreadDump() { + StringBuilder dump = new StringBuilder(); + Map stackTraces = Thread.getAllStackTraces(); + for (Map.Entry e : stackTraces.entrySet()) { + Thread thread = e.getKey(); + dump.append(String.format( + "\"%s\" %s prio=%d tid=%d %s\njava.lang.Thread.State: %s", + thread.getName(), + (thread.isDaemon() ? "daemon" : ""), + thread.getPriority(), + thread.getId(), + Thread.State.WAITING.equals(thread.getState()) ? + "in Object.wait()" : thread.getState().name().toLowerCase(), + Thread.State.WAITING.equals(thread.getState()) ? + "WAITING (on object monitor)" : thread.getState())); + for (StackTraceElement stackTraceElement : e.getValue()) { + dump.append("\n at "); + dump.append(stackTraceElement); + } + dump.append("\n"); + } + return dump.toString(); + } + + static String buildDeadlockInfo() { + ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); + long[] threadIds = threadBean.findMonitorDeadlockedThreads(); + if (threadIds != null && threadIds.length > 0) { + StringWriter stringWriter = new StringWriter(); + PrintWriter out = new PrintWriter(stringWriter); + + ThreadInfo[] infos = threadBean.getThreadInfo(threadIds, true, true); + for (ThreadInfo ti : infos) { + printThreadInfo(ti, out); + printLockInfo(ti.getLockedSynchronizers(), out); + out.println(); + } + + out.close(); + return stringWriter.toString(); + } else { + return null; + } + } + + private static void printThreadInfo(ThreadInfo ti, PrintWriter out) { + // print thread information + printThread(ti, out); + + // print stack trace with locks + StackTraceElement[] stacktrace = ti.getStackTrace(); + MonitorInfo[] monitors = ti.getLockedMonitors(); + for (int i = 0; i < stacktrace.length; i++) { + StackTraceElement ste = stacktrace[i]; + out.println(INDENT + "at " + ste.toString()); + for (MonitorInfo mi : monitors) { + if (mi.getLockedStackDepth() == i) { + out.println(INDENT + " - locked " + mi); + } + } + } + out.println(); + } + + private static void printThread(ThreadInfo ti, PrintWriter out) { + out.print("\"" + ti.getThreadName() + "\"" + " Id=" + + ti.getThreadId() + " in " + ti.getThreadState()); + if (ti.getLockName() != null) { + out.print(" on lock=" + ti.getLockName()); + } + if (ti.isSuspended()) { + out.print(" (suspended)"); + } + if (ti.isInNative()) { + out.print(" (running in native)"); + } + out.println(); + if (ti.getLockOwnerName() != null) { + out.println(INDENT + " owned by " + ti.getLockOwnerName() + " Id=" + + ti.getLockOwnerId()); + } + } + + private static void printLockInfo(LockInfo[] locks, PrintWriter out) { + out.println(INDENT + "Locked synchronizers: count = " + locks.length); + for (LockInfo li : locks) { + out.println(INDENT + " - " + li); + } + out.println(); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index 004ac20821..c5fd8f05f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -301,6 +301,12 @@ ${project.build.directory}/test-classes/krb5.conf ${kerberos.realm} + + + listener + org.apache.hadoop.test.TimedOutTestsListener + + **/${test.exclude}.java ${test.exclude.pattern} diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 55eaa2becc..df7405cada 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -189,6 +189,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ${startKdc} ${kdc.resource.dir} + + + listener + org.apache.hadoop.test.TimedOutTestsListener + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml index 9e6a03e9d0..dacf995b4d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml @@ -172,6 +172,18 @@ Max + + org.apache.maven.plugins + maven-surefire-plugin + + + + listener + org.apache.hadoop.test.TimedOutTestsListener + + + + diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index 225a582dbe..eb2dc22ca1 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -220,6 +220,18 @@ + + org.apache.maven.plugins + maven-surefire-plugin + + + + listener + org.apache.hadoop.test.TimedOutTestsListener + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index 718c5ffea8..559be4f4df 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -154,6 +154,18 @@ + + org.apache.maven.plugins + maven-surefire-plugin + + + + listener + org.apache.hadoop.test.TimedOutTestsListener + + + + @@ -187,4 +199,4 @@ hadoop-yarn-site hadoop-yarn-client - \ No newline at end of file +