From 73e626ad91cd5c06a005068d8432fd16e06fe6a0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 30 Oct 2014 15:10:27 +0000 Subject: [PATCH] YARN-2755. NM fails to clean up usercache_DEL_ dirs after YARN-661. Contributed by Siqi Li --- hadoop-yarn-project/CHANGES.txt | 3 + .../ResourceLocalizationService.java | 2 +- .../nodemanager/TestNodeManagerReboot.java | 90 +++++++++++++------ 3 files changed, 66 insertions(+), 29 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f4e4afa9f8..0e06661973 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -801,6 +801,9 @@ Release 2.6.0 - UNRELEASED YARN-2769. Fixed the problem that timeline domain is not set in distributed shell AM when using shell_command on Windows. (Varun Vasudev via zjshen) + YARN-2755. NM fails to clean up usercache_DEL_ dirs after + YARN-661 (Siqi Li via jlowe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index 549d8e7b29..08ed3a1fb3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -1324,7 +1324,7 @@ private void cleanUpFilesPerUserDir(FileContext lfs, DeletionService del, RemoteIterator userDirStatus = lfs.listStatus(userDirPath); FileDeletionTask dependentDeletionTask = del.createFileDeletionTask(null, userDirPath, new Path[] {}); - if (userDirStatus != null) { + if (userDirStatus != null && userDirStatus.hasNext()) { List deletionTasks = new ArrayList(); while (userDirStatus.hasNext()) { FileStatus status = userDirStatus.next(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java index e69170e4a5..e9aea0ef6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java @@ -35,7 +35,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; @@ -190,34 +192,9 @@ public Void run() throws YarnException, IOException { ResourceLocalizationService.NM_PRIVATE_DIR) > 0); // restart the NodeManager - nm.stop(); - nm = new MyNodeManager(); - nm.start(); - - numTries = 0; - while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(), - ContainerLocalizer.USERCACHE) > 0 - || numOfLocalDirs(nmLocalDir.getAbsolutePath(), - ContainerLocalizer.FILECACHE) > 0 || numOfLocalDirs( - nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0) - && numTries < MAX_TRIES) { - try { - Thread.sleep(500); - } catch (InterruptedException ex) { - // Do nothing - } - numTries++; - } - - Assert - .assertTrue( - "After NM reboots, all local files should be deleted", - numOfLocalDirs(nmLocalDir.getAbsolutePath(), - ContainerLocalizer.USERCACHE) == 0 - && numOfLocalDirs(nmLocalDir.getAbsolutePath(), - ContainerLocalizer.FILECACHE) == 0 - && numOfLocalDirs(nmLocalDir.getAbsolutePath(), - ResourceLocalizationService.NM_PRIVATE_DIR) == 0); + restartNM(MAX_TRIES); + checkNumOfLocalDirs(); + verify(delService, times(1)).delete( (String) isNull(), argThat(new PathInclude(ResourceLocalizationService.NM_PRIVATE_DIR @@ -230,8 +207,52 @@ && numOfLocalDirs(nmLocalDir.getAbsolutePath(), verify(delService, times(1)).scheduleFileDeletionTask( argThat(new FileDeletionInclude(null, ContainerLocalizer.USERCACHE + "_DEL_", new String[] {}))); + + // restart the NodeManager again + // this time usercache directory should be empty + restartNM(MAX_TRIES); + checkNumOfLocalDirs(); + } + private void restartNM(int maxTries) { + nm.stop(); + nm = new MyNodeManager(); + nm.start(); + + int numTries = 0; + while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(), + ContainerLocalizer.USERCACHE) > 0 + || numOfLocalDirs(nmLocalDir.getAbsolutePath(), + ContainerLocalizer.FILECACHE) > 0 || numOfLocalDirs( + nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0) + && numTries < maxTries) { + try { + Thread.sleep(500); + } catch (InterruptedException ex) { + // Do nothing + } + numTries++; + } + } + + private void checkNumOfLocalDirs() throws IOException { + Assert + .assertTrue( + "After NM reboots, all local files should be deleted", + numOfLocalDirs(nmLocalDir.getAbsolutePath(), + ContainerLocalizer.USERCACHE) == 0 + && numOfLocalDirs(nmLocalDir.getAbsolutePath(), + ContainerLocalizer.FILECACHE) == 0 + && numOfLocalDirs(nmLocalDir.getAbsolutePath(), + ResourceLocalizationService.NM_PRIVATE_DIR) == 0); + + Assert + .assertTrue( + "After NM reboots, usercache_DEL_* directory should be deleted", + numOfUsercacheDELDirs(nmLocalDir.getAbsolutePath()) == 0); + } + private int numOfLocalDirs(String localDir, String localSubDir) { File[] listOfFiles = new File(localDir, localSubDir).listFiles(); if (listOfFiles == null) { @@ -240,6 +261,19 @@ private int numOfLocalDirs(String localDir, String localSubDir) { return listOfFiles.length; } } + + private int numOfUsercacheDELDirs(String localDir) throws IOException { + int count = 0; + RemoteIterator fileStatus = localFS.listStatus(new Path(localDir)); + while (fileStatus.hasNext()) { + FileStatus status = fileStatus.next(); + if (status.getPath().getName().matches(".*" + + ContainerLocalizer.USERCACHE + "_DEL_.*")) { + count++; + } + } + return count; + } private void createFiles(String dir, String subDir, int numOfFiles) { for (int i = 0; i < numOfFiles; i++) {