HADOOP-8755. Print thread dump when tests fail due to timeout. Contributed by Andrey Klochkov.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384627 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86635168fe
commit
d8e386f01a
@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased
|
|||||||
HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
|
HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
|
||||||
(Andy Isaacson via eli)
|
(Andy Isaacson via eli)
|
||||||
|
|
||||||
|
HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey
|
||||||
|
Klochkov via atm)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -0,0 +1,181 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.test;
|
||||||
|
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.util.concurrent.BrokenBarrierException;
|
||||||
|
import java.util.concurrent.CyclicBarrier;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.notification.Failure;
|
||||||
|
|
||||||
|
public class TestTimedOutTestsListener {
|
||||||
|
|
||||||
|
public static class Deadlock {
|
||||||
|
private CyclicBarrier barrier = new CyclicBarrier(6);
|
||||||
|
|
||||||
|
public Deadlock() {
|
||||||
|
DeadlockThread[] dThreads = new DeadlockThread[6];
|
||||||
|
|
||||||
|
Monitor a = new Monitor("a");
|
||||||
|
Monitor b = new Monitor("b");
|
||||||
|
Monitor c = new Monitor("c");
|
||||||
|
dThreads[0] = new DeadlockThread("MThread-1", a, b);
|
||||||
|
dThreads[1] = new DeadlockThread("MThread-2", b, c);
|
||||||
|
dThreads[2] = new DeadlockThread("MThread-3", c, a);
|
||||||
|
|
||||||
|
Lock d = new ReentrantLock();
|
||||||
|
Lock e = new ReentrantLock();
|
||||||
|
Lock f = new ReentrantLock();
|
||||||
|
|
||||||
|
dThreads[3] = new DeadlockThread("SThread-4", d, e);
|
||||||
|
dThreads[4] = new DeadlockThread("SThread-5", e, f);
|
||||||
|
dThreads[5] = new DeadlockThread("SThread-6", f, d);
|
||||||
|
|
||||||
|
// make them daemon threads so that the test will exit
|
||||||
|
for (int i = 0; i < 6; i++) {
|
||||||
|
dThreads[i].setDaemon(true);
|
||||||
|
dThreads[i].start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class DeadlockThread extends Thread {
|
||||||
|
private Lock lock1 = null;
|
||||||
|
|
||||||
|
private Lock lock2 = null;
|
||||||
|
|
||||||
|
private Monitor mon1 = null;
|
||||||
|
|
||||||
|
private Monitor mon2 = null;
|
||||||
|
|
||||||
|
private boolean useSync;
|
||||||
|
|
||||||
|
DeadlockThread(String name, Lock lock1, Lock lock2) {
|
||||||
|
super(name);
|
||||||
|
this.lock1 = lock1;
|
||||||
|
this.lock2 = lock2;
|
||||||
|
this.useSync = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
DeadlockThread(String name, Monitor mon1, Monitor mon2) {
|
||||||
|
super(name);
|
||||||
|
this.mon1 = mon1;
|
||||||
|
this.mon2 = mon2;
|
||||||
|
this.useSync = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
if (useSync) {
|
||||||
|
syncLock();
|
||||||
|
} else {
|
||||||
|
monitorLock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void syncLock() {
|
||||||
|
lock1.lock();
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
barrier.await();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
goSyncDeadlock();
|
||||||
|
} finally {
|
||||||
|
lock1.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void goSyncDeadlock() {
|
||||||
|
try {
|
||||||
|
barrier.await();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
lock2.lock();
|
||||||
|
throw new RuntimeException("should not reach here.");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void monitorLock() {
|
||||||
|
synchronized (mon1) {
|
||||||
|
try {
|
||||||
|
barrier.await();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
goMonitorDeadlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void goMonitorDeadlock() {
|
||||||
|
try {
|
||||||
|
barrier.await();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
synchronized (mon2) {
|
||||||
|
throw new RuntimeException(getName() + " should not reach here.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class Monitor {
|
||||||
|
String name;
|
||||||
|
|
||||||
|
Monitor(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=500)
|
||||||
|
public void testThreadDumpAndDeadlocks() throws Exception {
|
||||||
|
new Deadlock();
|
||||||
|
String s = null;
|
||||||
|
while (true) {
|
||||||
|
s = TimedOutTestsListener.buildDeadlockInfo();
|
||||||
|
if (s != null)
|
||||||
|
break;
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals(3, countStringOccurrences(s, "BLOCKED"));
|
||||||
|
|
||||||
|
Failure failure = new Failure(
|
||||||
|
null, new Exception(TimedOutTestsListener.TEST_TIMED_OUT_PREFIX));
|
||||||
|
StringWriter writer = new StringWriter();
|
||||||
|
new TimedOutTestsListener(new PrintWriter(writer)).testFailure(failure);
|
||||||
|
String out = writer.toString();
|
||||||
|
|
||||||
|
Assert.assertTrue(out.contains("THREAD DUMP"));
|
||||||
|
Assert.assertTrue(out.contains("DEADLOCKS DETECTED"));
|
||||||
|
|
||||||
|
System.out.println(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int countStringOccurrences(String s, String substr) {
|
||||||
|
int n = 0;
|
||||||
|
int index = 0;
|
||||||
|
while ((index = s.indexOf(substr, index) + 1) != 0) {
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,166 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.test;
|
||||||
|
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.lang.management.LockInfo;
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.lang.management.MonitorInfo;
|
||||||
|
import java.lang.management.ThreadInfo;
|
||||||
|
import java.lang.management.ThreadMXBean;
|
||||||
|
import java.text.DateFormat;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.runner.notification.Failure;
|
||||||
|
import org.junit.runner.notification.RunListener;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JUnit run listener which prints full thread dump into System.err
|
||||||
|
* in case a test is failed due to timeout.
|
||||||
|
*/
|
||||||
|
public class TimedOutTestsListener extends RunListener {
|
||||||
|
|
||||||
|
static final String TEST_TIMED_OUT_PREFIX = "test timed out after";
|
||||||
|
|
||||||
|
private static String INDENT = " ";
|
||||||
|
|
||||||
|
private final PrintWriter output;
|
||||||
|
|
||||||
|
public TimedOutTestsListener() {
|
||||||
|
this.output = new PrintWriter(System.err);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TimedOutTestsListener(PrintWriter output) {
|
||||||
|
this.output = output;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void testFailure(Failure failure) throws Exception {
|
||||||
|
if (failure != null && failure.getMessage() != null
|
||||||
|
&& failure.getMessage().startsWith(TEST_TIMED_OUT_PREFIX)) {
|
||||||
|
output.println("====> TEST TIMED OUT. PRINTING THREAD DUMP. <====");
|
||||||
|
output.println();
|
||||||
|
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
|
||||||
|
output.println(String.format("Timestamp: %s", dateFormat.format(new Date())));
|
||||||
|
output.println();
|
||||||
|
output.println(buildThreadDump());
|
||||||
|
|
||||||
|
String deadlocksInfo = buildDeadlockInfo();
|
||||||
|
if (deadlocksInfo != null) {
|
||||||
|
output.println("====> DEADLOCKS DETECTED <====");
|
||||||
|
output.println();
|
||||||
|
output.println(deadlocksInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static String buildThreadDump() {
|
||||||
|
StringBuilder dump = new StringBuilder();
|
||||||
|
Map<Thread, StackTraceElement[]> stackTraces = Thread.getAllStackTraces();
|
||||||
|
for (Map.Entry<Thread, StackTraceElement[]> e : stackTraces.entrySet()) {
|
||||||
|
Thread thread = e.getKey();
|
||||||
|
dump.append(String.format(
|
||||||
|
"\"%s\" %s prio=%d tid=%d %s\njava.lang.Thread.State: %s",
|
||||||
|
thread.getName(),
|
||||||
|
(thread.isDaemon() ? "daemon" : ""),
|
||||||
|
thread.getPriority(),
|
||||||
|
thread.getId(),
|
||||||
|
Thread.State.WAITING.equals(thread.getState()) ?
|
||||||
|
"in Object.wait()" : thread.getState().name().toLowerCase(),
|
||||||
|
Thread.State.WAITING.equals(thread.getState()) ?
|
||||||
|
"WAITING (on object monitor)" : thread.getState()));
|
||||||
|
for (StackTraceElement stackTraceElement : e.getValue()) {
|
||||||
|
dump.append("\n at ");
|
||||||
|
dump.append(stackTraceElement);
|
||||||
|
}
|
||||||
|
dump.append("\n");
|
||||||
|
}
|
||||||
|
return dump.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
static String buildDeadlockInfo() {
|
||||||
|
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
|
||||||
|
long[] threadIds = threadBean.findMonitorDeadlockedThreads();
|
||||||
|
if (threadIds != null && threadIds.length > 0) {
|
||||||
|
StringWriter stringWriter = new StringWriter();
|
||||||
|
PrintWriter out = new PrintWriter(stringWriter);
|
||||||
|
|
||||||
|
ThreadInfo[] infos = threadBean.getThreadInfo(threadIds, true, true);
|
||||||
|
for (ThreadInfo ti : infos) {
|
||||||
|
printThreadInfo(ti, out);
|
||||||
|
printLockInfo(ti.getLockedSynchronizers(), out);
|
||||||
|
out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
out.close();
|
||||||
|
return stringWriter.toString();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printThreadInfo(ThreadInfo ti, PrintWriter out) {
|
||||||
|
// print thread information
|
||||||
|
printThread(ti, out);
|
||||||
|
|
||||||
|
// print stack trace with locks
|
||||||
|
StackTraceElement[] stacktrace = ti.getStackTrace();
|
||||||
|
MonitorInfo[] monitors = ti.getLockedMonitors();
|
||||||
|
for (int i = 0; i < stacktrace.length; i++) {
|
||||||
|
StackTraceElement ste = stacktrace[i];
|
||||||
|
out.println(INDENT + "at " + ste.toString());
|
||||||
|
for (MonitorInfo mi : monitors) {
|
||||||
|
if (mi.getLockedStackDepth() == i) {
|
||||||
|
out.println(INDENT + " - locked " + mi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printThread(ThreadInfo ti, PrintWriter out) {
|
||||||
|
out.print("\"" + ti.getThreadName() + "\"" + " Id="
|
||||||
|
+ ti.getThreadId() + " in " + ti.getThreadState());
|
||||||
|
if (ti.getLockName() != null) {
|
||||||
|
out.print(" on lock=" + ti.getLockName());
|
||||||
|
}
|
||||||
|
if (ti.isSuspended()) {
|
||||||
|
out.print(" (suspended)");
|
||||||
|
}
|
||||||
|
if (ti.isInNative()) {
|
||||||
|
out.print(" (running in native)");
|
||||||
|
}
|
||||||
|
out.println();
|
||||||
|
if (ti.getLockOwnerName() != null) {
|
||||||
|
out.println(INDENT + " owned by " + ti.getLockOwnerName() + " Id="
|
||||||
|
+ ti.getLockOwnerId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printLockInfo(LockInfo[] locks, PrintWriter out) {
|
||||||
|
out.println(INDENT + "Locked synchronizers: count = " + locks.length);
|
||||||
|
for (LockInfo li : locks) {
|
||||||
|
out.println(INDENT + " - " + li);
|
||||||
|
}
|
||||||
|
out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -301,6 +301,12 @@
|
|||||||
<java.security.krb5.conf>${project.build.directory}/test-classes/krb5.conf</java.security.krb5.conf>
|
<java.security.krb5.conf>${project.build.directory}/test-classes/krb5.conf</java.security.krb5.conf>
|
||||||
<kerberos.realm>${kerberos.realm}</kerberos.realm>
|
<kerberos.realm>${kerberos.realm}</kerberos.realm>
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
|
<properties>
|
||||||
|
<property>
|
||||||
|
<name>listener</name>
|
||||||
|
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
<excludes>
|
<excludes>
|
||||||
<exclude>**/${test.exclude}.java</exclude>
|
<exclude>**/${test.exclude}.java</exclude>
|
||||||
<exclude>${test.exclude.pattern}</exclude>
|
<exclude>${test.exclude.pattern}</exclude>
|
||||||
|
@ -189,6 +189,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|||||||
<startKdc>${startKdc}</startKdc>
|
<startKdc>${startKdc}</startKdc>
|
||||||
<kdc.resource.dir>${kdc.resource.dir}</kdc.resource.dir>
|
<kdc.resource.dir>${kdc.resource.dir}</kdc.resource.dir>
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
|
<properties>
|
||||||
|
<property>
|
||||||
|
<name>listener</name>
|
||||||
|
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
@ -172,6 +172,18 @@
|
|||||||
<effort>Max</effort>
|
<effort>Max</effort>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<properties>
|
||||||
|
<property>
|
||||||
|
<name>listener</name>
|
||||||
|
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
@ -220,6 +220,18 @@
|
|||||||
</includes>
|
</includes>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<properties>
|
||||||
|
<property>
|
||||||
|
<name>listener</name>
|
||||||
|
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
@ -154,6 +154,18 @@
|
|||||||
</includes>
|
</includes>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<properties>
|
||||||
|
<property>
|
||||||
|
<name>listener</name>
|
||||||
|
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||||
|
</property>
|
||||||
|
</properties>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user