HDFS-15961. standby namenode failed to start ordered snapshot deletion is enabled while having snapshottable directories (#2881)

This commit is contained in:
bshashikant 2021-04-27 09:15:06 +05:30 committed by GitHub
parent 64264d726d
commit ef13f8ad6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 11 deletions

View File

@ -124,7 +124,12 @@
import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean; import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean;
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
import org.apache.hadoop.ipc.ObserverRetryOnActiveException; import org.apache.hadoop.ipc.ObserverRetryOnActiveException;
import org.apache.hadoop.util.*; import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.now;
import static org.apache.hadoop.util.Time.monotonicNow; import static org.apache.hadoop.util.Time.monotonicNow;
@ -8560,25 +8565,36 @@ void checkAccess(String src, FsAction mode) throws IOException {
/** /**
* Check if snapshot roots are created for all existing snapshottable * Check if snapshot roots are created for all existing snapshottable
* directories. Create them if not. * directories. Create them if not.
* Only the active NameNode needs to execute this in HA setup once it is out
* of safe mode.
*
* The function gets called while exiting safe mode or post starting the
* services in Active NameNode, but comes into effect post whichever event
* happens later.
*/ */
@Override @Override
public void checkAndProvisionSnapshotTrashRoots() { public synchronized void checkAndProvisionSnapshotTrashRoots() {
if (isSnapshotTrashRootEnabled) { if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState()
|| !haEnabled) && !blockManager.isInSafeMode()) {
SnapshottableDirectoryStatus dirStatus = null;
try { try {
SnapshottableDirectoryStatus[] dirStatusList = SnapshottableDirectoryStatus[] dirStatusList =
getSnapshottableDirListing(); getSnapshottableDirListing();
if (dirStatusList == null) { if (dirStatusList == null) {
return; return;
} }
for (SnapshottableDirectoryStatus dirStatus : dirStatusList) { for (SnapshottableDirectoryStatus status : dirStatusList) {
dirStatus = status;
String currDir = dirStatus.getFullPath().toString(); String currDir = dirStatus.getFullPath().toString();
if (!currDir.endsWith(Path.SEPARATOR)) { if (!currDir.endsWith(Path.SEPARATOR)) {
currDir += Path.SEPARATOR; currDir += Path.SEPARATOR;
} }
String trashPath = currDir + FileSystem.TRASH_PREFIX; String trashPath = currDir + FileSystem.TRASH_PREFIX;
HdfsFileStatus fileStatus = getFileInfo(trashPath, false, false, false); HdfsFileStatus fileStatus =
getFileInfo(trashPath, false, false, false);
if (fileStatus == null) { if (fileStatus == null) {
LOG.info("Trash doesn't exist for snapshottable directory {}. " + "Creating trash at {}", currDir, trashPath); LOG.info("Trash doesn't exist for snapshottable directory {}. "
+ "Creating trash at {}", currDir, trashPath);
PermissionStatus permissionStatus = PermissionStatus permissionStatus =
new PermissionStatus(getRemoteUser().getShortUserName(), null, new PermissionStatus(getRemoteUser().getShortUserName(), null,
SHARED_TRASH_PERMISSION); SHARED_TRASH_PERMISSION);
@ -8586,12 +8602,13 @@ public void checkAndProvisionSnapshotTrashRoots() {
} }
} }
} catch (IOException e) { } catch (IOException e) {
final String msg = if (dirStatus == null) {
"Could not provision Trash directory for existing " LOG.error("Failed to get snapshottable directory list", e);
+ "snapshottable directories. Exiting Namenode."; } else {
ExitUtil.terminate(1, msg); LOG.error("Could not provision Trash directory for existing "
+ "snapshottable directory {}", dirStatus, e);
}
} }
} }
} }

View File

@ -2021,6 +2021,7 @@ public HAState getState() {
public void startActiveServices() throws IOException { public void startActiveServices() throws IOException {
try { try {
namesystem.startActiveServices(); namesystem.startActiveServices();
namesystem.checkAndProvisionSnapshotTrashRoots();
startTrashEmptier(getConf()); startTrashEmptier(getConf());
} catch (Throwable t) { } catch (Throwable t) {
doImmediateShutdown(t); doImmediateShutdown(t);

View File

@ -98,6 +98,7 @@ public void setupCluster() throws Exception {
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.setBoolean("dfs.namenode.snapshot.trashroot.enabled", false);
cluster = new MiniDFSCluster.Builder(conf) cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology()) .nnTopology(MiniDFSNNTopology.simpleHATopology())
@ -909,6 +910,42 @@ public Boolean get() {
assertSafeMode(nn1, 3, 3, 3, 0); assertSafeMode(nn1, 3, 3, 3, 0);
} }
@Test
public void testNameNodeCreateSnapshotTrashRootOnHASetup() throws Exception {
DistributedFileSystem dfs = cluster.getFileSystem(0);
final Path testDir = new Path("/disallowss/test2/");
final Path file0path = new Path(testDir, "file-0");
dfs.create(file0path).close();
dfs.allowSnapshot(testDir);
// .Trash won't be created right now since snapshot trash is disabled
final Path trashRoot = new Path(testDir, FileSystem.TRASH_PREFIX);
assertFalse(dfs.exists(trashRoot));
// Set dfs.namenode.snapshot.trashroot.enabled=true
cluster.getNameNode(0).getConf()
.setBoolean("dfs.namenode.snapshot.trashroot.enabled", true);
cluster.getNameNode(1).getConf()
.setBoolean("dfs.namenode.snapshot.trashroot.enabled", true);
restartActive();
cluster.transitionToActive(1);
dfs = cluster.getFileSystem(1);
// Make sure .Trash path does not exist yet as on NN1 trash root is not
// enabled
assertFalse(dfs.exists(trashRoot));
cluster.transitionToStandby(1);
cluster.transitionToActive(0);
dfs = cluster.getFileSystem(0);
// Check .Trash existence, should be created now
assertTrue(dfs.exists(trashRoot));
assertFalse(cluster.getNameNode(0).isInSafeMode());
restartStandby();
// Ensure Standby namenode is up and running
assertTrue(cluster.getNameNode(1).isStandbyState());
// Cleanup
dfs.delete(trashRoot, true);
dfs.disallowSnapshot(testDir);
dfs.delete(testDir, true);
}
/** /**
* Test transition to active when namenode in safemode. * Test transition to active when namenode in safemode.
* *