From 86e3993def01223f92b8d1dd35f6c1f8ab6033f5 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Mon, 1 Dec 2014 11:42:10 -0800 Subject: [PATCH] HADOOP-11333. Fix deadlock in DomainSocketWatcher when the notification pipe is full (zhaoyunjiong via cmccabe) --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../hadoop/net/unix/DomainSocketWatcher.java | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 3fe9219230..59ccba06b7 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -473,6 +473,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11300. KMS startup scripts must not display the keystore / truststore passwords. (Arun Suresh via wang) + HADOOP-11333. Fix deadlock in DomainSocketWatcher when the notification + pipe is full (zhaoyunjiong via cmccabe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java index 95ef30d2ea..0172f6bfae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java @@ -103,6 +103,7 @@ private class NotificationHandler implements Handler { public boolean handle(DomainSocket sock) { assert(lock.isHeldByCurrentThread()); try { + kicked = false; if (LOG.isTraceEnabled()) { LOG.trace(this + ": NotificationHandler: doing a read on " + sock.fd); @@ -228,6 +229,14 @@ private static class FdSet { * Whether or not this DomainSocketWatcher is closed. */ private boolean closed = false; + + /** + * True if we have written a byte to the notification socket. We should not + * write anything else to the socket until the notification handler has had a + * chance to run. Otherwise, our thread might block, causing deadlock. + * See HADOOP-11333 for details. + */ + private boolean kicked = false; public DomainSocketWatcher(int interruptCheckPeriodMs) throws IOException { if (loadingFailureReason != null) { @@ -348,8 +357,14 @@ public void remove(DomainSocket sock) { */ private void kick() { assert(lock.isHeldByCurrentThread()); + + if (kicked) { + return; + } + try { notificationSockets[0].getOutputStream().write(0); + kicked = true; } catch (IOException e) { if (!closed) { LOG.error(this + ": error writing to notificationSockets[0]", e);