YARN-11188. Only files belong to the first file controller are removed even if multiple log aggregation file controllers are configured. Contributed by Szilard Nemeth.

This commit is contained in:
9uapaw 2022-06-22 14:40:00 +02:00
parent c9ddbd210c
commit e6ecc4f3e4
5 changed files with 141 additions and 33 deletions

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.yarn.logaggregation; package org.apache.hadoop.yarn.logaggregation;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
@ -57,7 +59,7 @@ public class AggregatedLogDeletionService extends AbstractService {
private Timer timer = null; private Timer timer = null;
private long checkIntervalMsecs; private long checkIntervalMsecs;
private LogDeletionTask task; private List<LogDeletionTask> tasks;
public static class LogDeletionTask extends TimerTask { public static class LogDeletionTask extends TimerTask {
private Configuration conf; private Configuration conf;
@ -66,14 +68,12 @@ public static class LogDeletionTask extends TimerTask {
private Path remoteRootLogDir = null; private Path remoteRootLogDir = null;
private ApplicationClientProtocol rmClient = null; private ApplicationClientProtocol rmClient = null;
public LogDeletionTask(Configuration conf, long retentionSecs, ApplicationClientProtocol rmClient) { public LogDeletionTask(Configuration conf, long retentionSecs,
ApplicationClientProtocol rmClient,
LogAggregationFileController fileController) {
this.conf = conf; this.conf = conf;
this.retentionMillis = retentionSecs * 1000; this.retentionMillis = retentionSecs * 1000;
this.suffix = LogAggregationUtils.getBucketSuffix(); this.suffix = LogAggregationUtils.getBucketSuffix();
LogAggregationFileControllerFactory factory =
new LogAggregationFileControllerFactory(conf);
LogAggregationFileController fileController =
factory.getFileControllerForWrite();
this.remoteRootLogDir = fileController.getRemoteRootLogDir(); this.remoteRootLogDir = fileController.getRemoteRootLogDir();
this.rmClient = rmClient; this.rmClient = rmClient;
} }
@ -220,7 +220,7 @@ public AggregatedLogDeletionService() {
@Override @Override
protected void serviceStart() throws Exception { protected void serviceStart() throws Exception {
scheduleLogDeletionTask(); scheduleLogDeletionTasks();
super.serviceStart(); super.serviceStart();
} }
@ -249,13 +249,13 @@ public void refreshLogRetentionSettings() throws IOException {
setConfig(conf); setConfig(conf);
stopRMClient(); stopRMClient();
stopTimer(); stopTimer();
scheduleLogDeletionTask(); scheduleLogDeletionTasks();
} else { } else {
LOG.warn("Failed to execute refreshLogRetentionSettings : Aggregated Log Deletion Service is not started"); LOG.warn("Failed to execute refreshLogRetentionSettings : Aggregated Log Deletion Service is not started");
} }
} }
private void scheduleLogDeletionTask() throws IOException { private void scheduleLogDeletionTasks() throws IOException {
Configuration conf = getConfig(); Configuration conf = getConfig();
if (!conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, if (!conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED)) { YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED)) {
@ -271,9 +271,28 @@ private void scheduleLogDeletionTask() throws IOException {
return; return;
} }
setLogAggCheckIntervalMsecs(retentionSecs); setLogAggCheckIntervalMsecs(retentionSecs);
task = new LogDeletionTask(conf, retentionSecs, createRMClient());
timer = new Timer(); tasks = createLogDeletionTasks(conf, retentionSecs, createRMClient());
timer.scheduleAtFixedRate(task, 0, checkIntervalMsecs); for (LogDeletionTask task : tasks) {
timer = new Timer();
timer.scheduleAtFixedRate(task, 0, checkIntervalMsecs);
}
}
@VisibleForTesting
public List<LogDeletionTask> createLogDeletionTasks(Configuration conf, long retentionSecs,
ApplicationClientProtocol rmClient)
throws IOException {
List<LogDeletionTask> tasks = new ArrayList<>();
LogAggregationFileControllerFactory factory = new LogAggregationFileControllerFactory(conf);
List<LogAggregationFileController> fileControllers =
factory.getConfiguredLogAggregationFileControllerList();
for (LogAggregationFileController fileController : fileControllers) {
LogDeletionTask task = new LogDeletionTask(conf, retentionSecs, rmClient,
fileController);
tasks.add(task);
}
return tasks;
} }
private void stopTimer() { private void stopTimer() {
@ -295,14 +314,18 @@ protected Configuration createConf() {
// as @Idempotent, it will automatically take care of RM restart/failover. // as @Idempotent, it will automatically take care of RM restart/failover.
@VisibleForTesting @VisibleForTesting
protected ApplicationClientProtocol createRMClient() throws IOException { protected ApplicationClientProtocol createRMClient() throws IOException {
return ClientRMProxy.createRMProxy(getConfig(), return ClientRMProxy.createRMProxy(getConfig(), ApplicationClientProtocol.class);
ApplicationClientProtocol.class);
} }
@VisibleForTesting @VisibleForTesting
protected void stopRMClient() { protected void stopRMClient() {
if (task != null && task.getRMClient() != null) { for (LogDeletionTask task : tasks) {
RPC.stopProxy(task.getRMClient()); if (task != null && task.getRMClient() != null) {
RPC.stopProxy(task.getRMClient());
//The RMClient instance is the same for all deletion tasks.
//It is enough to close the RM client once
break;
}
} }
} }
} }

View File

@ -42,6 +42,7 @@
import java.util.List; import java.util.List;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.LOG_AGGREGATION_FILE_CONTROLLER_FMT; import static org.apache.hadoop.yarn.conf.YarnConfiguration.LOG_AGGREGATION_FILE_CONTROLLER_FMT;
import static org.apache.hadoop.yarn.logaggregation.LogAggregationTestUtils.enableFileControllers;
import static org.apache.hadoop.yarn.logaggregation.testutils.LogAggregationTestcaseBuilder.NO_TIMEOUT; import static org.apache.hadoop.yarn.logaggregation.testutils.LogAggregationTestcaseBuilder.NO_TIMEOUT;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
@ -118,12 +119,12 @@ public void testDeletion() throws Exception {
.withRunningApps(4) .withRunningApps(4)
.injectExceptionForAppDirDeletion(3) .injectExceptionForAppDirDeletion(3)
.build() .build()
.setupAndRunDeletionService() .startDeletionService()
.verifyAppDirsDeleted(timeout, 1, 3) .verifyAppDirsDeleted(timeout, 1, 3)
.verifyAppDirsNotDeleted(timeout, 2, 4) .verifyAppDirsNotDeleted(timeout, 2, 4)
.verifyAppFileDeleted(4, 1, timeout) .verifyAppFileDeleted(4, 1, timeout)
.verifyAppFileNotDeleted(4, 2, timeout) .verifyAppFileNotDeleted(4, 2, timeout)
.teardown(); .teardown(1);
} }
@Test @Test
@ -155,7 +156,7 @@ public void testRefreshLogRetentionSettings() throws Exception {
.build(); .build();
testcase testcase
.setupAndRunDeletionService() .startDeletionService()
//app1Dir would be deleted since it is done above log retention period //app1Dir would be deleted since it is done above log retention period
.verifyAppDirDeleted(1, 10000L) .verifyAppDirDeleted(1, 10000L)
//app2Dir is not expected to be deleted since it is below the threshold //app2Dir is not expected to be deleted since it is below the threshold
@ -176,7 +177,8 @@ public void testRefreshLogRetentionSettings() throws Exception {
.verifyCheckIntervalMilliSecondsEqualTo(checkIntervalMilliSeconds) .verifyCheckIntervalMilliSecondsEqualTo(checkIntervalMilliSeconds)
//app2Dir should be deleted since it falls above the threshold //app2Dir should be deleted since it falls above the threshold
.verifyAppDirDeleted(2, 10000L) .verifyAppDirDeleted(2, 10000L)
.teardown(); //Close expected 2 times: once for refresh and once for stopping
.teardown(2);
} }
@Test @Test
@ -202,7 +204,7 @@ public void testCheckInterval() throws Exception {
.withFinishedApps(1) .withFinishedApps(1)
.withRunningApps() .withRunningApps()
.build() .build()
.setupAndRunDeletionService() .startDeletionService()
.verifyAnyPathListedAtLeast(4, 10000L) .verifyAnyPathListedAtLeast(4, 10000L)
.verifyAppDirNotDeleted(1, NO_TIMEOUT) .verifyAppDirNotDeleted(1, NO_TIMEOUT)
// modify the timestamp of the logs and verify if it is picked up quickly // modify the timestamp of the logs and verify if it is picked up quickly
@ -211,7 +213,7 @@ public void testCheckInterval() throws Exception {
.changeModTimeOfBucketDir(toDeleteTime) .changeModTimeOfBucketDir(toDeleteTime)
.reinitAllPaths() .reinitAllPaths()
.verifyAppDirDeleted(1, 10000L) .verifyAppDirDeleted(1, 10000L)
.teardown(); .teardown(1);
} }
@Test @Test
@ -241,6 +243,59 @@ public void testRobustLogDeletion() throws Exception {
.verifyAppDirDeleted(3, NO_TIMEOUT); .verifyAppDirDeleted(3, NO_TIMEOUT);
} }
@Test
public void testDeletionTwoControllers() throws IOException {
long now = System.currentTimeMillis();
long toDeleteTime = now - (2000 * 1000);
long toKeepTime = now - (1500 * 1000);
Configuration conf = setupConfiguration(1800, -1);
enableFileControllers(conf, REMOTE_ROOT_LOG_DIR, ALL_FILE_CONTROLLERS,
ALL_FILE_CONTROLLER_NAMES);
long timeout = 2000L;
LogAggregationTestcaseBuilder.create(conf)
.withRootPath(ROOT)
.withRemoteRootLogPath(REMOTE_ROOT_LOG_DIR)
.withBothFileControllers()
.withUserDir(USER_ME, toKeepTime)
.withSuffixDir(SUFFIX, toDeleteTime)
.withBucketDir(toDeleteTime)
.withApps(//Apps for TFile
Lists.newArrayList(
new AppDescriptor(T_FILE, toDeleteTime, Lists.newArrayList()),
new AppDescriptor(T_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toKeepTime))),
new AppDescriptor(T_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toDeleteTime))),
new AppDescriptor(T_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toKeepTime))),
//Apps for IFile
new AppDescriptor(I_FILE, toDeleteTime, Lists.newArrayList()),
new AppDescriptor(I_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toKeepTime))),
new AppDescriptor(I_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toDeleteTime))),
new AppDescriptor(I_FILE, toDeleteTime, Lists.newArrayList(
Pair.of(DIR_HOST1, toDeleteTime),
Pair.of(DIR_HOST2, toKeepTime)))))
.withFinishedApps(1, 2, 3, 5, 6, 7)
.withRunningApps(4, 8)
.injectExceptionForAppDirDeletion(3, 6)
.build()
.startDeletionService()
.verifyAppDirsDeleted(timeout, 1, 3, 5, 7)
.verifyAppDirsNotDeleted(timeout, 2, 4, 6, 8)
.verifyAppFilesDeleted(timeout, Lists.newArrayList(Pair.of(4, 1), Pair.of(8, 1)))
.verifyAppFilesNotDeleted(timeout, Lists.newArrayList(Pair.of(4, 2), Pair.of(8, 2)))
.teardown(1);
}
static class MockFileSystem extends FilterFileSystem { static class MockFileSystem extends FilterFileSystem {
MockFileSystem() { MockFileSystem() {
super(mock(FileSystem.class)); super(mock(FileSystem.class));

View File

@ -32,6 +32,7 @@ public class AggregatedLogDeletionServiceForTest extends AggregatedLogDeletionSe
private final List<ApplicationId> finishedApplications; private final List<ApplicationId> finishedApplications;
private final List<ApplicationId> runningApplications; private final List<ApplicationId> runningApplications;
private final Configuration conf; private final Configuration conf;
private ApplicationClientProtocol mockRMClient;
public AggregatedLogDeletionServiceForTest(List<ApplicationId> runningApplications, public AggregatedLogDeletionServiceForTest(List<ApplicationId> runningApplications,
List<ApplicationId> finishedApplications) { List<ApplicationId> finishedApplications) {
@ -48,11 +49,16 @@ public AggregatedLogDeletionServiceForTest(List<ApplicationId> runningApplicatio
@Override @Override
protected ApplicationClientProtocol createRMClient() throws IOException { protected ApplicationClientProtocol createRMClient() throws IOException {
if (mockRMClient != null) {
return mockRMClient;
}
try { try {
return createMockRMClient(finishedApplications, runningApplications); mockRMClient =
createMockRMClient(finishedApplications, runningApplications);
} catch (Exception e) { } catch (Exception e) {
throw new IOException(e); throw new IOException(e);
} }
return mockRMClient;
} }
@Override @Override
@ -60,8 +66,7 @@ protected Configuration createConf() {
return conf; return conf;
} }
@Override public ApplicationClientProtocol getMockRMClient() {
protected void stopRMClient() { return mockRMClient;
// DO NOTHING
} }
} }

View File

@ -28,10 +28,12 @@
import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService; import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService.LogDeletionTask;
import org.apache.hadoop.yarn.logaggregation.testutils.LogAggregationTestcaseBuilder.AppDescriptor; import org.apache.hadoop.yarn.logaggregation.testutils.LogAggregationTestcaseBuilder.AppDescriptor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
@ -77,6 +79,7 @@ public class LogAggregationTestcase {
private List<PathWithFileStatus> appDirs; private List<PathWithFileStatus> appDirs;
private final List<AppDescriptor> appDescriptors; private final List<AppDescriptor> appDescriptors;
private AggregatedLogDeletionServiceForTest deletionService; private AggregatedLogDeletionServiceForTest deletionService;
private ApplicationClientProtocol rmClient;
public LogAggregationTestcase(LogAggregationTestcaseBuilder builder) throws IOException { public LogAggregationTestcase(LogAggregationTestcaseBuilder builder) throws IOException {
conf = builder.conf; conf = builder.conf;
@ -102,6 +105,8 @@ public LogAggregationTestcase(LogAggregationTestcaseBuilder builder) throws IOEx
mockFs = ((FilterFileSystem) builder.rootFs).getRawFileSystem(); mockFs = ((FilterFileSystem) builder.rootFs).getRawFileSystem();
validateAppControllers(); validateAppControllers();
setupMocks(); setupMocks();
setupDeletionService();
} }
private void validateAppControllers() { private void validateAppControllers() {
@ -241,10 +246,13 @@ private void setupListStatusForPath(PathWithFileStatus dir, FileStatus[] fileSta
when(mockFs.listStatus(dir.path)).thenReturn(fileStatuses); when(mockFs.listStatus(dir.path)).thenReturn(fileStatuses);
} }
public LogAggregationTestcase setupAndRunDeletionService() { private void setupDeletionService() {
List<ApplicationId> finishedApps = createFinishedAppsList(); List<ApplicationId> finishedApps = createFinishedAppsList();
List<ApplicationId> runningApps = createRunningAppsList(); List<ApplicationId> runningApps = createRunningAppsList();
deletionService = new AggregatedLogDeletionServiceForTest(runningApps, finishedApps, conf); deletionService = new AggregatedLogDeletionServiceForTest(runningApps, finishedApps, conf);
}
public LogAggregationTestcase startDeletionService() {
deletionService.init(conf); deletionService.init(conf);
deletionService.start(); deletionService.start();
return this; return this;
@ -271,10 +279,13 @@ private List<ApplicationId> createFinishedAppsList() {
public LogAggregationTestcase runDeletionTask(long retentionSeconds) throws Exception { public LogAggregationTestcase runDeletionTask(long retentionSeconds) throws Exception {
List<ApplicationId> finishedApps = createFinishedAppsList(); List<ApplicationId> finishedApps = createFinishedAppsList();
List<ApplicationId> runningApps = createRunningAppsList(); List<ApplicationId> runningApps = createRunningAppsList();
ApplicationClientProtocol rmClient = createMockRMClient(finishedApps, runningApps); rmClient = createMockRMClient(finishedApps, runningApps);
AggregatedLogDeletionService.LogDeletionTask deletionTask = List<LogDeletionTask> tasks = deletionService.createLogDeletionTasks(conf, retentionSeconds,
new AggregatedLogDeletionService.LogDeletionTask(conf, retentionSeconds, rmClient); rmClient);
deletionTask.run(); for (LogDeletionTask deletionTask : tasks) {
deletionTask.run();
}
return this; return this;
} }
@ -359,8 +370,20 @@ private void verifyAppFileDeletion(int appId, int fileNo, int times, long timeou
verify(mockFs, timeout(timeout).times(times)).delete(file.path, true); verify(mockFs, timeout(timeout).times(times)).delete(file.path, true);
} }
public void teardown() { private void verifyMockRmClientWasClosedNTimes(int expectedRmClientCloses)
throws IOException {
ApplicationClientProtocol mockRMClient;
if (deletionService != null) {
mockRMClient = deletionService.getMockRMClient();
} else {
mockRMClient = rmClient;
}
verify((Closeable)mockRMClient, times(expectedRmClientCloses)).close();
}
public void teardown(int expectedRmClientCloses) throws IOException {
deletionService.stop(); deletionService.stop();
verifyMockRmClientWasClosedNTimes(expectedRmClientCloses);
} }
public LogAggregationTestcase refreshLogRetentionSettings() throws IOException { public LogAggregationTestcase refreshLogRetentionSettings() throws IOException {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.logaggregation.testutils; package org.apache.hadoop.yarn.logaggregation.testutils;
import org.apache.hadoop.test.MockitoUtil;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
@ -34,7 +35,8 @@ public class MockRMClientUtils {
public static ApplicationClientProtocol createMockRMClient( public static ApplicationClientProtocol createMockRMClient(
List<ApplicationId> finishedApplications, List<ApplicationId> finishedApplications,
List<ApplicationId> runningApplications) throws Exception { List<ApplicationId> runningApplications) throws Exception {
final ApplicationClientProtocol mockProtocol = mock(ApplicationClientProtocol.class); final ApplicationClientProtocol mockProtocol =
MockitoUtil.mockProtocol(ApplicationClientProtocol.class);
if (finishedApplications != null && !finishedApplications.isEmpty()) { if (finishedApplications != null && !finishedApplications.isEmpty()) {
for (ApplicationId appId : finishedApplications) { for (ApplicationId appId : finishedApplications) {
GetApplicationReportRequest request = GetApplicationReportRequest.newInstance(appId); GetApplicationReportRequest request = GetApplicationReportRequest.newInstance(appId);