From 91a96eaa534dbb27e81b6c24bbb8138200a80a83 Mon Sep 17 00:00:00 2001 From: cnauroth Date: Fri, 12 Feb 2016 15:50:10 -0800 Subject: [PATCH] HADOOP-12780. During WASB atomic rename handle crash when one directory has been renamed but not file under it. Contributed by Madhumita Chakraborty. --- .../hadoop-common/CHANGES.txt | 3 + .../fs/azure/AzureNativeFileSystemStore.java | 30 +++++++++ .../fs/azure/NativeAzureFileSystem.java | 8 +-- .../fs/azure/NativeFileSystemStore.java | 2 + .../azure/NativeAzureFileSystemBaseTest.java | 67 +++++++++++++++++++ 5 files changed, 106 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index e7974ebec7..56cb6943af 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1704,6 +1704,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12795. KMS does not log detailed stack trace for unexpected errors. (cnauroth) + HADOOP-12780. During atomic rename handle crash when one directory has been + renamed but not file under it. (Madhumita Chakraborty via cnauroth) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java index c8deb4654a..7ccefd6566 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java @@ -2563,6 +2563,36 @@ private void waitForCopyToComplete(CloudBlobWrapper blob, OperationContext opCon } } + /** + * Checks whether an explicit file/folder exists. + * This is used by redo of atomic rename. + * There was a bug(apache jira HADOOP-12780) during atomic rename if + * process crashes after an inner directory has been renamed but still + * there are file under that directory to be renamed then after the + * process comes again it tries to redo the renames. It checks whether + * the directory exists or not by calling filesystem.exist. + * But filesystem.Exists will treat that directory as implicit directory + * and return true as file exists under that directory. So It will try + * try to rename that directory and will fail as the corresponding blob + * does not exist. So this method explicitly checks for the blob. + */ + @Override + public boolean explicitFileExists(String key) throws AzureException { + CloudBlobWrapper blob; + try { + blob = getBlobReference(key); + if (null != blob && blob.exists(getInstrumentedContext())) { + return true; + } + + return false; + } catch (StorageException e) { + throw new AzureException(e); + } catch (URISyntaxException e) { + throw new AzureException(e); + } + } + /** * Changes the permission status on the given key. */ diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index ed65184625..c160af8dab 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -558,13 +558,13 @@ private void finishSingleFileRename(String fileName) throws IOException { Path srcFile = fullPath(srcKey, fileName); Path dstFile = fullPath(dstKey, fileName); - boolean srcExists = fs.exists(srcFile); - boolean dstExists = fs.exists(dstFile); + String srcName = fs.pathToKey(srcFile); + String dstName = fs.pathToKey(dstFile); + boolean srcExists = fs.getStoreInterface().explicitFileExists(srcName); + boolean dstExists = fs.getStoreInterface().explicitFileExists(dstName); if(srcExists) { // Rename gets exclusive access (via a lease) for HBase write-ahead log // (WAL) file processing correctness. See the rename code for details. - String srcName = fs.pathToKey(srcFile); - String dstName = fs.pathToKey(dstFile); fs.getStoreInterface().rename(srcName, dstName, true, null); } else if (!srcExists && dstExists) { // The rename already finished, so do nothing. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java index f052b7f067..acdd3d6c07 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java @@ -109,4 +109,6 @@ void updateFolderLastModifiedTime(String key, Date lastModified, SelfRenewingLease acquireLease(String key) throws AzureException; DataOutputStream retrieveAppendStream(String key, int bufferSize) throws IOException; + + boolean explicitFileExists(String key) throws AzureException; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java index 78370986fd..60e5ec62d1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java @@ -845,6 +845,73 @@ public void testRedoRenameFolderInFolderListing() throws IOException { assertTrue(fs.exists(new Path(inner2renamed, "file"))); } + /** + * There is a nested folder and file under the folder to be renamed + * and the process crashes after the nested folder has been renamed but not the file. + * then when you list the parent folder, pending renames should be redone + * Apache jira HADOOP-12780 + */ + @Test + public void testRedoRenameFolderRenameInProgress() throws IOException { + + // create original folder + String parent = "parent"; + Path parentFolder = new Path(parent); + assertTrue(fs.mkdirs(parentFolder)); + Path folderToBeRenamed = new Path(parentFolder, "folderToBeRenamed"); + assertTrue(fs.mkdirs(folderToBeRenamed)); + String innerFolderName = "innerFolder"; + Path inner = new Path(folderToBeRenamed, innerFolderName); + assertTrue(fs.mkdirs(inner)); + String innerFileName = "file"; + Path innerFile = new Path(inner, innerFileName); + assertTrue(fs.createNewFile(innerFile)); + + Path renamedFolder = new Path(parentFolder, "renamedFolder"); + + // propose (but don't do) the rename of innerFolder2 + Path home = fs.getHomeDirectory(); + String relativeHomeDir = getRelativePath(home.toString()); + NativeAzureFileSystem.FolderRenamePending pending = + new NativeAzureFileSystem.FolderRenamePending( + relativeHomeDir + "/" + folderToBeRenamed, + relativeHomeDir + "/" + renamedFolder, null, + (NativeAzureFileSystem) fs); + + // Create a rename-pending file and write rename information to it. + final String renamePendingStr = folderToBeRenamed + FolderRenamePending.SUFFIX; + Path renamePendingFile = new Path(renamePendingStr); + FSDataOutputStream out = fs.create(renamePendingFile, true); + assertTrue(out != null); + writeString(out, pending.makeRenamePendingFileContents()); + + // Rename inner folder to simulate the scenario where rename has started and + // only one directory has been renamed but not the files under it + ((NativeAzureFileSystem) fs).getStoreInterface().rename( + relativeHomeDir + "/" +inner, relativeHomeDir + "/" +renamedFolder + "/" + innerFolderName , true, null); + + // Instead of using fs.exist use store.explicitFileExists because fs.exist will return true + // even if directory has been renamed, but there are still file under that directory + assertFalse(((NativeAzureFileSystem) fs).getStoreInterface(). + explicitFileExists(relativeHomeDir + "/" + inner)); // verify the explicit inner folder is gone + assertTrue(((NativeAzureFileSystem) fs).getStoreInterface(). + explicitFileExists(relativeHomeDir + "/" + innerFile)); // verify inner file is present + + // Redo the rename operation based on the contents of the + // -RenamePending.json file. Trigger the redo by checking for existence of + // the original folder. It must appear to not exist. + FileStatus[] listed = fs.listStatus(parentFolder); + assertEquals(1, listed.length); + assertTrue(listed[0].isDirectory()); + + // The rename pending file is not a directory, so at this point we know the + // redo has been done. + assertFalse(fs.exists(inner)); // verify original folder is gone + assertFalse(fs.exists(innerFile)); // verify original file is gone + assertTrue(fs.exists(renamedFolder)); // verify the target is there + assertTrue(fs.exists(new Path(renamedFolder, innerFolderName + "/" + innerFileName))); + } + /** * Test the situation when the rename metadata file is empty * i.e. it is created but not written yet. In that case in next rename