YARN-10739. GenericEventHandler.printEventQueueDetails causes RM recovery to take too much time. Contributed by Qi Zhu.

This commit is contained in:
Peter Bacsko 2021-04-27 14:01:53 +02:00
parent f54e7646cf
commit a967ab06f2
2 changed files with 17 additions and 3 deletions

View File

@ -25,7 +25,11 @@
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.yarn.metrics.EventTypeMetrics;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.MonotonicClock;
@ -93,6 +97,8 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
private Clock clock = new MonotonicClock();
private ThreadPoolExecutor printEventDetailsExecutor;
/**
* The thread name for dispatcher.
*/
@ -179,6 +185,15 @@ protected void serviceInit(Configuration conf) throws Exception{
YARN_DISPATCHER_PRINT_EVENTS_INFO_THRESHOLD,
YarnConfiguration.
DEFAULT_YARN_DISPATCHER_PRINT_EVENTS_INFO_THRESHOLD);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat("PrintEventDetailsThread #%d")
.build();
// Thread pool for async print event details,
// to prevent wasting too much time for RM.
printEventDetailsExecutor = new ThreadPoolExecutor(
1, 5, 10, TimeUnit.SECONDS,
new LinkedBlockingQueue<>(), threadFactory);
}
@Override
@ -222,6 +237,7 @@ protected void serviceStop() throws Exception {
LOG.warn("Interrupted Exception while stopping", ie);
}
}
printEventDetailsExecutor.shutdownNow();
// stop all the components
super.serviceStop();
@ -319,7 +335,7 @@ public void handle(Event event) {
if (qSize != 0 && qSize % detailsInterval == 0
&& lastEventDetailsQueueSizeLogged != qSize) {
lastEventDetailsQueueSizeLogged = qSize;
printEventQueueDetails();
printEventDetailsExecutor.submit(this::printEventQueueDetails);
printTrigger = true;
}
int remCapacity = eventQueue.remainingCapacity();

View File

@ -187,8 +187,6 @@ public void testPrintDispatcherEventDetails() throws Exception {
when(event.getType()).thenReturn(TestEnum.TestEventType);
dispatcher.getEventHandler().handle(event);
}
verify(log, atLeastOnce()).info("Event type: TestEventType, " +
"Event record counter: 5000");
Thread.sleep(2000);
//Make sure more than one event to take
verify(log, atLeastOnce()).