HADOOP-18801. Delete path directly when it can not be parsed in trash. (#5744). Contributed by farmmamba.

Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
hfutatzhanghb 2023-07-16 12:20:46 +08:00 committed by GitHub
parent 8dd9c874e1
commit b95595158f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 83 additions and 2 deletions

View File

@ -214,6 +214,15 @@ public class CommonConfigurationKeysPublic {
public static final String FS_TRASH_INTERVAL_KEY = "fs.trash.interval";
/** Default value for FS_TRASH_INTERVAL_KEY */
public static final long FS_TRASH_INTERVAL_DEFAULT = 0;
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a>
*/
public static final String FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY =
"fs.trash.clean.trashroot.enable";
/** Default value for FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY. */
public static final boolean FS_TRASH_CLEAN_TRASHROOT_ENABLE_DEFAULT = false;
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">

View File

@ -19,6 +19,8 @@
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CHECKPOINT_INTERVAL_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CHECKPOINT_INTERVAL_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CLEAN_TRASHROOT_ENABLE_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
@ -70,6 +72,8 @@ public class TrashPolicyDefault extends TrashPolicy {
private long emptierInterval;
private boolean cleanNonCheckpointUnderTrashRoot;
public TrashPolicyDefault() { }
private TrashPolicyDefault(FileSystem fs, Configuration conf)
@ -90,6 +94,8 @@ public void initialize(Configuration conf, FileSystem fs, Path home) {
this.emptierInterval = (long)(conf.getFloat(
FS_TRASH_CHECKPOINT_INTERVAL_KEY, FS_TRASH_CHECKPOINT_INTERVAL_DEFAULT)
* MSECS_PER_MINUTE);
this.cleanNonCheckpointUnderTrashRoot = conf.getBoolean(
FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY, FS_TRASH_CLEAN_TRASHROOT_ENABLE_DEFAULT);
}
@Override
@ -101,6 +107,8 @@ public void initialize(Configuration conf, FileSystem fs) {
this.emptierInterval = (long)(conf.getFloat(
FS_TRASH_CHECKPOINT_INTERVAL_KEY, FS_TRASH_CHECKPOINT_INTERVAL_DEFAULT)
* MSECS_PER_MINUTE);
this.cleanNonCheckpointUnderTrashRoot = conf.getBoolean(
FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY, FS_TRASH_CLEAN_TRASHROOT_ENABLE_DEFAULT);
if (deletionInterval < 0) {
LOG.warn("Invalid value {} for deletion interval,"
+ " deletion interaval can not be negative."
@ -374,9 +382,15 @@ private void deleteCheckpoint(Path trashRoot, boolean deleteImmediately)
try {
time = getTimeFromCheckpoint(name);
} catch (ParseException e) {
if (cleanNonCheckpointUnderTrashRoot) {
fs.delete(path, true);
LOG.warn("Unexpected item in trash: " + dir + ". Deleting.");
continue;
} else {
LOG.warn("Unexpected item in trash: " + dir + ". Ignoring.");
continue;
}
}
if (((now - deletionInterval) > time) || deleteImmediately) {
if (fs.delete(path, true)) {

View File

@ -974,6 +974,14 @@
</description>
</property>
<property>
<name>fs.trash.clean.trashroot.enable</name>
<value>false</value>
<description>Whether clean some directories and files
in Trash home which are not under checkpoint directory.
</description>
</property>
<property>
<name>fs.protected.directories</name>
<value></value>

View File

@ -32,6 +32,7 @@
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import org.junit.After;
import org.junit.Before;
@ -786,6 +787,55 @@ public void testTrashEmptier() throws Exception {
emptierThread.join();
}
/**
* Test trash emptier can delete non-checkpoint dir or not.
* @throws Exception
*/
@Test()
public void testTrashEmptierCleanDirNotInCheckpointDir() throws Exception {
Configuration conf = new Configuration();
// Trash with 12 second deletes and 6 seconds checkpoints.
conf.set(FS_TRASH_INTERVAL_KEY, "0.2"); // 12 seconds
conf.setClass("fs.file.impl", TestLFS.class, FileSystem.class);
conf.set(FS_TRASH_CHECKPOINT_INTERVAL_KEY, "0.1"); // 6 seconds
conf.setBoolean(FS_TRASH_CLEAN_TRASHROOT_ENABLE_KEY, true);
FileSystem fs = FileSystem.getLocal(conf);
conf.set("fs.default.name", fs.getUri().toString());
Trash trash = new Trash(conf);
// Start Emptier in background.
Runnable emptier = trash.getEmptier();
Thread emptierThread = new Thread(emptier);
emptierThread.start();
FsShell shell = new FsShell();
shell.setConf(conf);
shell.init();
// Make sure the .Trash dir existed.
mkdir(fs, shell.getCurrentTrashDir());
assertTrue(fs.exists(shell.getCurrentTrashDir()));
// Create a directory under .Trash directly.
Path myPath = new Path(shell.getCurrentTrashDir().getParent(), "test_dirs");
mkdir(fs, myPath);
assertTrue(fs.exists(myPath));
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
return !fs.exists(myPath);
} catch (IOException e) {
// Do nothing.
}
return false;
}
}, 6000, 60000);
emptierThread.interrupt();
emptierThread.join();
}
@After
public void tearDown() throws IOException {
File trashDir = new File(TEST_DIR.toUri().getPath());