HDFS-15961. standby namenode failed to start ordered snapshot deletion is enabled while having snapshottable directories (#2881)
This commit is contained in:
parent
64264d726d
commit
ef13f8ad6b
@ -124,7 +124,12 @@
|
|||||||
import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean;
|
import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||||
import org.apache.hadoop.ipc.ObserverRetryOnActiveException;
|
import org.apache.hadoop.ipc.ObserverRetryOnActiveException;
|
||||||
import org.apache.hadoop.util.*;
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.apache.hadoop.util.Daemon;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
import org.apache.hadoop.util.VersionInfo;
|
||||||
|
|
||||||
import static org.apache.hadoop.util.Time.now;
|
import static org.apache.hadoop.util.Time.now;
|
||||||
import static org.apache.hadoop.util.Time.monotonicNow;
|
import static org.apache.hadoop.util.Time.monotonicNow;
|
||||||
@ -8560,25 +8565,36 @@ void checkAccess(String src, FsAction mode) throws IOException {
|
|||||||
/**
|
/**
|
||||||
* Check if snapshot roots are created for all existing snapshottable
|
* Check if snapshot roots are created for all existing snapshottable
|
||||||
* directories. Create them if not.
|
* directories. Create them if not.
|
||||||
|
* Only the active NameNode needs to execute this in HA setup once it is out
|
||||||
|
* of safe mode.
|
||||||
|
*
|
||||||
|
* The function gets called while exiting safe mode or post starting the
|
||||||
|
* services in Active NameNode, but comes into effect post whichever event
|
||||||
|
* happens later.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void checkAndProvisionSnapshotTrashRoots() {
|
public synchronized void checkAndProvisionSnapshotTrashRoots() {
|
||||||
if (isSnapshotTrashRootEnabled) {
|
if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState()
|
||||||
|
|| !haEnabled) && !blockManager.isInSafeMode()) {
|
||||||
|
SnapshottableDirectoryStatus dirStatus = null;
|
||||||
try {
|
try {
|
||||||
SnapshottableDirectoryStatus[] dirStatusList =
|
SnapshottableDirectoryStatus[] dirStatusList =
|
||||||
getSnapshottableDirListing();
|
getSnapshottableDirListing();
|
||||||
if (dirStatusList == null) {
|
if (dirStatusList == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (SnapshottableDirectoryStatus dirStatus : dirStatusList) {
|
for (SnapshottableDirectoryStatus status : dirStatusList) {
|
||||||
|
dirStatus = status;
|
||||||
String currDir = dirStatus.getFullPath().toString();
|
String currDir = dirStatus.getFullPath().toString();
|
||||||
if (!currDir.endsWith(Path.SEPARATOR)) {
|
if (!currDir.endsWith(Path.SEPARATOR)) {
|
||||||
currDir += Path.SEPARATOR;
|
currDir += Path.SEPARATOR;
|
||||||
}
|
}
|
||||||
String trashPath = currDir + FileSystem.TRASH_PREFIX;
|
String trashPath = currDir + FileSystem.TRASH_PREFIX;
|
||||||
HdfsFileStatus fileStatus = getFileInfo(trashPath, false, false, false);
|
HdfsFileStatus fileStatus =
|
||||||
|
getFileInfo(trashPath, false, false, false);
|
||||||
if (fileStatus == null) {
|
if (fileStatus == null) {
|
||||||
LOG.info("Trash doesn't exist for snapshottable directory {}. " + "Creating trash at {}", currDir, trashPath);
|
LOG.info("Trash doesn't exist for snapshottable directory {}. "
|
||||||
|
+ "Creating trash at {}", currDir, trashPath);
|
||||||
PermissionStatus permissionStatus =
|
PermissionStatus permissionStatus =
|
||||||
new PermissionStatus(getRemoteUser().getShortUserName(), null,
|
new PermissionStatus(getRemoteUser().getShortUserName(), null,
|
||||||
SHARED_TRASH_PERMISSION);
|
SHARED_TRASH_PERMISSION);
|
||||||
@ -8586,12 +8602,13 @@ public void checkAndProvisionSnapshotTrashRoots() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
final String msg =
|
if (dirStatus == null) {
|
||||||
"Could not provision Trash directory for existing "
|
LOG.error("Failed to get snapshottable directory list", e);
|
||||||
+ "snapshottable directories. Exiting Namenode.";
|
} else {
|
||||||
ExitUtil.terminate(1, msg);
|
LOG.error("Could not provision Trash directory for existing "
|
||||||
|
+ "snapshottable directory {}", dirStatus, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2021,6 +2021,7 @@ public HAState getState() {
|
|||||||
public void startActiveServices() throws IOException {
|
public void startActiveServices() throws IOException {
|
||||||
try {
|
try {
|
||||||
namesystem.startActiveServices();
|
namesystem.startActiveServices();
|
||||||
|
namesystem.checkAndProvisionSnapshotTrashRoots();
|
||||||
startTrashEmptier(getConf());
|
startTrashEmptier(getConf());
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
doImmediateShutdown(t);
|
doImmediateShutdown(t);
|
||||||
|
@ -98,6 +98,7 @@ public void setupCluster() throws Exception {
|
|||||||
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||||
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
||||||
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
||||||
|
conf.setBoolean("dfs.namenode.snapshot.trashroot.enabled", false);
|
||||||
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf)
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
@ -909,6 +910,42 @@ public Boolean get() {
|
|||||||
assertSafeMode(nn1, 3, 3, 3, 0);
|
assertSafeMode(nn1, 3, 3, 3, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNameNodeCreateSnapshotTrashRootOnHASetup() throws Exception {
|
||||||
|
DistributedFileSystem dfs = cluster.getFileSystem(0);
|
||||||
|
final Path testDir = new Path("/disallowss/test2/");
|
||||||
|
final Path file0path = new Path(testDir, "file-0");
|
||||||
|
dfs.create(file0path).close();
|
||||||
|
dfs.allowSnapshot(testDir);
|
||||||
|
// .Trash won't be created right now since snapshot trash is disabled
|
||||||
|
final Path trashRoot = new Path(testDir, FileSystem.TRASH_PREFIX);
|
||||||
|
assertFalse(dfs.exists(trashRoot));
|
||||||
|
// Set dfs.namenode.snapshot.trashroot.enabled=true
|
||||||
|
cluster.getNameNode(0).getConf()
|
||||||
|
.setBoolean("dfs.namenode.snapshot.trashroot.enabled", true);
|
||||||
|
cluster.getNameNode(1).getConf()
|
||||||
|
.setBoolean("dfs.namenode.snapshot.trashroot.enabled", true);
|
||||||
|
restartActive();
|
||||||
|
cluster.transitionToActive(1);
|
||||||
|
dfs = cluster.getFileSystem(1);
|
||||||
|
// Make sure .Trash path does not exist yet as on NN1 trash root is not
|
||||||
|
// enabled
|
||||||
|
assertFalse(dfs.exists(trashRoot));
|
||||||
|
cluster.transitionToStandby(1);
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
dfs = cluster.getFileSystem(0);
|
||||||
|
// Check .Trash existence, should be created now
|
||||||
|
assertTrue(dfs.exists(trashRoot));
|
||||||
|
assertFalse(cluster.getNameNode(0).isInSafeMode());
|
||||||
|
restartStandby();
|
||||||
|
// Ensure Standby namenode is up and running
|
||||||
|
assertTrue(cluster.getNameNode(1).isStandbyState());
|
||||||
|
// Cleanup
|
||||||
|
dfs.delete(trashRoot, true);
|
||||||
|
dfs.disallowSnapshot(testDir);
|
||||||
|
dfs.delete(testDir, true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test transition to active when namenode in safemode.
|
* Test transition to active when namenode in safemode.
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user