HADOOP-16424. S3Guard fsck: Check internal consistency of the MetadataStore (#1691). Contributed by Gabor Bota.
This commit is contained in:
parent
d4bde134e3
commit
875a3e97dd
@ -24,12 +24,21 @@
|
|||||||
import java.util.ArrayDeque;
|
import java.util.ArrayDeque;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Item;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.ScanOutcome;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.internal.IteratorSupport;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec;
|
||||||
|
import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -38,10 +47,14 @@
|
|||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
import org.apache.hadoop.util.StopWatch;
|
import org.apache.hadoop.util.StopWatch;
|
||||||
|
|
||||||
import static java.util.stream.Collectors.toList;
|
import static java.util.stream.Collectors.toList;
|
||||||
import static java.util.stream.Collectors.toSet;
|
import static java.util.stream.Collectors.toSet;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.itemToPathMetadata;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.pathToKey;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.pathToParentKey;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main class for the FSCK factored out from S3GuardTool
|
* Main class for the FSCK factored out from S3GuardTool
|
||||||
@ -51,6 +64,7 @@
|
|||||||
* Functions:
|
* Functions:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Checking metadata consistency between S3 and metadatastore</li>
|
* <li>Checking metadata consistency between S3 and metadatastore</li>
|
||||||
|
* <li>Checking the internal metadata consistency</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public class S3GuardFsck {
|
public class S3GuardFsck {
|
||||||
@ -306,7 +320,7 @@ protected ComparePair compareFileStatusToPathMetadata(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(msPathMetadata.getFileStatus().getVersionId() == null
|
if(msPathMetadata.getFileStatus().getVersionId() == null
|
||||||
|| s3FileStatus.getVersionId() == null ) {
|
|| s3FileStatus.getVersionId() == null) {
|
||||||
LOG.debug("Missing versionIDs skipped. A HEAD request is "
|
LOG.debug("Missing versionIDs skipped. A HEAD request is "
|
||||||
+ "required for each object to get the versionID.");
|
+ "required for each object to get the versionID.");
|
||||||
} else if(!s3FileStatus.getVersionId().equals(msFileStatus.getVersionId())) {
|
} else if(!s3FileStatus.getVersionId().equals(msFileStatus.getVersionId())) {
|
||||||
@ -349,7 +363,11 @@ public static class ComparePair {
|
|||||||
this.msPathMetadata = pm;
|
this.msPathMetadata = pm;
|
||||||
this.s3DirListing = null;
|
this.s3DirListing = null;
|
||||||
this.msDirListing = null;
|
this.msDirListing = null;
|
||||||
this.path = status.getPath();
|
if (status != null) {
|
||||||
|
this.path = status.getPath();
|
||||||
|
} else {
|
||||||
|
this.path = pm.getFileStatus().getPath();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ComparePair(List<FileStatus> s3DirListing, DirListingMetadata msDirListing) {
|
ComparePair(List<FileStatus> s3DirListing, DirListingMetadata msDirListing) {
|
||||||
@ -396,6 +414,230 @@ public Path getPath() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the DynamoDB metadatastore internally for consistency.
|
||||||
|
* <pre>
|
||||||
|
* Tasks to do here:
|
||||||
|
* - find orphan entries (entries without a parent).
|
||||||
|
* - find if a file's parent is not a directory (so the parent is a file).
|
||||||
|
* - find entries where the parent is a tombstone.
|
||||||
|
* - warn: no lastUpdated field.
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
public List<ComparePair> checkDdbInternalConsistency(Path basePath)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkArgument(basePath.isAbsolute(), "path must be absolute");
|
||||||
|
|
||||||
|
List<ComparePair> comparePairs = new ArrayList<>();
|
||||||
|
String rootStr = basePath.toString();
|
||||||
|
LOG.info("Root for internal consistency check: {}", rootStr);
|
||||||
|
StopWatch stopwatch = new StopWatch();
|
||||||
|
stopwatch.start();
|
||||||
|
|
||||||
|
final Table table = metadataStore.getTable();
|
||||||
|
final String username = metadataStore.getUsername();
|
||||||
|
DDBTree ddbTree = new DDBTree();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* I. Root node construction
|
||||||
|
* - If the root node is the real bucket root, a node is constructed instead of
|
||||||
|
* doing a query to the ddb because the bucket root is not stored.
|
||||||
|
* - If the root node is not a real bucket root then the entry is queried from
|
||||||
|
* the ddb and constructed from the result.
|
||||||
|
*/
|
||||||
|
|
||||||
|
DDBPathMetadata baseMeta;
|
||||||
|
|
||||||
|
if (!basePath.isRoot()) {
|
||||||
|
PrimaryKey rootKey = pathToKey(basePath);
|
||||||
|
final GetItemSpec spec = new GetItemSpec()
|
||||||
|
.withPrimaryKey(rootKey)
|
||||||
|
.withConsistentRead(true);
|
||||||
|
final Item baseItem = table.getItem(spec);
|
||||||
|
baseMeta = itemToPathMetadata(baseItem, username);
|
||||||
|
|
||||||
|
if (baseMeta == null) {
|
||||||
|
throw new FileNotFoundException(
|
||||||
|
"Base element metadata is null. " +
|
||||||
|
"This means the base path element is missing, or wrong path was " +
|
||||||
|
"passed as base path to the internal ddb consistency checker.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
baseMeta = new DDBPathMetadata(
|
||||||
|
new S3AFileStatus(Tristate.UNKNOWN, basePath, username)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
DDBTreeNode root = new DDBTreeNode(baseMeta);
|
||||||
|
ddbTree.addNode(root);
|
||||||
|
ddbTree.setRoot(root);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* II. Build and check the descendant tree:
|
||||||
|
* 1. query all nodes where the prefix is the given root, and put it in the tree
|
||||||
|
* 2. Check connectivity: check if each parent is in the hashmap
|
||||||
|
* - This is done in O(n): we only need to find the parent based on the
|
||||||
|
* path with a hashmap lookup.
|
||||||
|
* - Do a test if the graph is connected - if the parent is not in the
|
||||||
|
* hashmap, we found an orphan entry.
|
||||||
|
*
|
||||||
|
* 3. Do test the elements for errors:
|
||||||
|
* - File is a parent of a file.
|
||||||
|
* - Entries where the parent is tombstoned but the entries are not.
|
||||||
|
* - Warn on no lastUpdated field.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
|
||||||
|
builder.withCondition(
|
||||||
|
ExpressionSpecBuilder.S("parent")
|
||||||
|
.beginsWith(pathToParentKey(basePath))
|
||||||
|
);
|
||||||
|
final IteratorSupport<Item, ScanOutcome> resultIterator = table.scan(
|
||||||
|
builder.buildForScan()).iterator();
|
||||||
|
resultIterator.forEachRemaining(item -> {
|
||||||
|
final DDBPathMetadata pmd = itemToPathMetadata(item, username);
|
||||||
|
DDBTreeNode ddbTreeNode = new DDBTreeNode(pmd);
|
||||||
|
ddbTree.addNode(ddbTreeNode);
|
||||||
|
});
|
||||||
|
|
||||||
|
LOG.debug("Root: {}", ddbTree.getRoot());
|
||||||
|
|
||||||
|
for (Map.Entry<Path, DDBTreeNode> entry : ddbTree.getContentMap().entrySet()) {
|
||||||
|
final DDBTreeNode node = entry.getValue();
|
||||||
|
final ComparePair pair = new ComparePair(null, node.val);
|
||||||
|
// let's skip the root node when checking.
|
||||||
|
if (node.getVal().getFileStatus().getPath().isRoot()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(node.getVal().getLastUpdated() == 0) {
|
||||||
|
pair.violations.add(Violation.NO_LASTUPDATED_FIELD);
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip further checking the basenode which is not the actual bucket root.
|
||||||
|
if (node.equals(ddbTree.getRoot())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Path parent = node.getFileStatus().getPath().getParent();
|
||||||
|
final DDBTreeNode parentNode = ddbTree.getContentMap().get(parent);
|
||||||
|
if (parentNode == null) {
|
||||||
|
pair.violations.add(Violation.ORPHAN_DDB_ENTRY);
|
||||||
|
} else {
|
||||||
|
if (!node.isTombstoned() && !parentNode.isDirectory()) {
|
||||||
|
pair.violations.add(Violation.PARENT_IS_A_FILE);
|
||||||
|
}
|
||||||
|
if(!node.isTombstoned() && parentNode.isTombstoned()) {
|
||||||
|
pair.violations.add(Violation.PARENT_TOMBSTONED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!pair.violations.isEmpty()) {
|
||||||
|
comparePairs.add(pair);
|
||||||
|
}
|
||||||
|
|
||||||
|
node.setParent(parentNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a handler and handle each violated pairs
|
||||||
|
S3GuardFsckViolationHandler handler =
|
||||||
|
new S3GuardFsckViolationHandler(rawFS, metadataStore);
|
||||||
|
comparePairs.forEach(handler::handle);
|
||||||
|
|
||||||
|
stopwatch.stop();
|
||||||
|
LOG.info("Total scan time: {}s", stopwatch.now(TimeUnit.SECONDS));
|
||||||
|
LOG.info("Scanned entries: {}", ddbTree.contentMap.size());
|
||||||
|
|
||||||
|
return comparePairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DDBTree is the tree that represents the structure of items in the DynamoDB.
|
||||||
|
*/
|
||||||
|
public static class DDBTree {
|
||||||
|
private final Map<Path, DDBTreeNode> contentMap = new HashMap<>();
|
||||||
|
private DDBTreeNode root;
|
||||||
|
|
||||||
|
public DDBTree() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Path, DDBTreeNode> getContentMap() {
|
||||||
|
return contentMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DDBTreeNode getRoot() {
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRoot(DDBTreeNode root) {
|
||||||
|
this.root = root;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addNode(DDBTreeNode pm) {
|
||||||
|
contentMap.put(pm.getVal().getFileStatus().getPath(), pm);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DDBTree{" +
|
||||||
|
"contentMap=" + contentMap +
|
||||||
|
", root=" + root +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tree node for DDBTree.
|
||||||
|
*/
|
||||||
|
private static final class DDBTreeNode {
|
||||||
|
private final DDBPathMetadata val;
|
||||||
|
private DDBTreeNode parent;
|
||||||
|
private final List<DDBPathMetadata> children;
|
||||||
|
|
||||||
|
private DDBTreeNode(DDBPathMetadata pm) {
|
||||||
|
this.val = pm;
|
||||||
|
this.parent = null;
|
||||||
|
this.children = new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public DDBPathMetadata getVal() {
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DDBTreeNode getParent() {
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParent(DDBTreeNode parent) {
|
||||||
|
this.parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DDBPathMetadata> getChildren() {
|
||||||
|
return children;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDirectory() {
|
||||||
|
return val.getFileStatus().isDirectory();
|
||||||
|
}
|
||||||
|
|
||||||
|
public S3AFileStatus getFileStatus() {
|
||||||
|
return val.getFileStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isTombstoned() {
|
||||||
|
return val.isDeleted();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DDBTreeNode{" +
|
||||||
|
"val=" + val +
|
||||||
|
", parent=" + parent +
|
||||||
|
", children=" + children +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Violation with severity and the handler.
|
* Violation with severity and the handler.
|
||||||
* Defines the severity of the violation between 0-2
|
* Defines the severity of the violation between 0-2
|
||||||
@ -437,7 +679,7 @@ public enum Violation {
|
|||||||
AUTHORITATIVE_DIRECTORY_CONTENT_MISMATCH(1,
|
AUTHORITATIVE_DIRECTORY_CONTENT_MISMATCH(1,
|
||||||
S3GuardFsckViolationHandler.AuthDirContentMismatch.class),
|
S3GuardFsckViolationHandler.AuthDirContentMismatch.class),
|
||||||
/**
|
/**
|
||||||
* An entry in the MS is tombstoned, but the object is not deleted on S3
|
* An entry in the MS is tombstoned, but the object is not deleted on S3.
|
||||||
*/
|
*/
|
||||||
TOMBSTONED_IN_MS_NOT_DELETED_IN_S3(0,
|
TOMBSTONED_IN_MS_NOT_DELETED_IN_S3(0,
|
||||||
S3GuardFsckViolationHandler.TombstonedInMsNotDeletedInS3.class),
|
S3GuardFsckViolationHandler.TombstonedInMsNotDeletedInS3.class),
|
||||||
@ -462,7 +704,16 @@ public enum Violation {
|
|||||||
* Don't worry too much if we don't have an etag.
|
* Don't worry too much if we don't have an etag.
|
||||||
*/
|
*/
|
||||||
NO_ETAG(2,
|
NO_ETAG(2,
|
||||||
S3GuardFsckViolationHandler.NoEtag.class);
|
S3GuardFsckViolationHandler.NoEtag.class),
|
||||||
|
/**
|
||||||
|
* The entry does not have a parent in ddb.
|
||||||
|
*/
|
||||||
|
ORPHAN_DDB_ENTRY(0, S3GuardFsckViolationHandler.OrphanDDBEntry.class),
|
||||||
|
/**
|
||||||
|
* The entry's lastUpdated field is empty.
|
||||||
|
*/
|
||||||
|
NO_LASTUPDATED_FIELD(2,
|
||||||
|
S3GuardFsckViolationHandler.NoLastUpdatedField.class);
|
||||||
|
|
||||||
private final int severity;
|
private final int severity;
|
||||||
private final Class<? extends S3GuardFsckViolationHandler.ViolationHandler> handler;
|
private final Class<? extends S3GuardFsckViolationHandler.ViolationHandler> handler;
|
||||||
|
@ -343,4 +343,34 @@ public String getError() {
|
|||||||
return "The entry for the path is tombstoned in the MS.";
|
return "The entry for the path is tombstoned in the MS.";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The violation handler there's no parent in the MetadataStore.
|
||||||
|
*/
|
||||||
|
public static class OrphanDDBEntry extends ViolationHandler {
|
||||||
|
|
||||||
|
public OrphanDDBEntry(S3GuardFsck.ComparePair comparePair) {
|
||||||
|
super(comparePair);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getError() {
|
||||||
|
return "The DDB entry is orphan - there is no parent in the MS.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The violation handler when there's no last updated field for the entry.
|
||||||
|
*/
|
||||||
|
public static class NoLastUpdatedField extends ViolationHandler {
|
||||||
|
|
||||||
|
public NoLastUpdatedField(S3GuardFsck.ComparePair comparePair) {
|
||||||
|
super(comparePair);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getError() {
|
||||||
|
return "No lastUpdated field provided for the entry.";
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1544,6 +1544,7 @@ private void vprintln(PrintStream out, String format, Object...
|
|||||||
*/
|
*/
|
||||||
static class Fsck extends S3GuardTool {
|
static class Fsck extends S3GuardTool {
|
||||||
public static final String CHECK_FLAG = "check";
|
public static final String CHECK_FLAG = "check";
|
||||||
|
public static final String DDB_MS_CONSISTENCY_FLAG = "internal";
|
||||||
|
|
||||||
public static final String NAME = "fsck";
|
public static final String NAME = "fsck";
|
||||||
public static final String PURPOSE = "Compares S3 with MetadataStore, and "
|
public static final String PURPOSE = "Compares S3 with MetadataStore, and "
|
||||||
@ -1553,10 +1554,12 @@ static class Fsck extends S3GuardTool {
|
|||||||
"\t" + PURPOSE + "\n\n" +
|
"\t" + PURPOSE + "\n\n" +
|
||||||
"Common options:\n" +
|
"Common options:\n" +
|
||||||
" -" + CHECK_FLAG + " Check the metadata store for errors, but do "
|
" -" + CHECK_FLAG + " Check the metadata store for errors, but do "
|
||||||
+ "not fix any issues.\n";
|
+ "not fix any issues.\n"+
|
||||||
|
" -" + DDB_MS_CONSISTENCY_FLAG + " Check the dynamodb metadata store "
|
||||||
|
+ "for internal consistency.\n";
|
||||||
|
|
||||||
Fsck(Configuration conf) {
|
Fsck(Configuration conf) {
|
||||||
super(conf, CHECK_FLAG);
|
super(conf, CHECK_FLAG, DDB_MS_CONSISTENCY_FLAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1578,6 +1581,21 @@ public int run(String[] args, PrintStream out) throws
|
|||||||
}
|
}
|
||||||
int exitValue = EXIT_SUCCESS;
|
int exitValue = EXIT_SUCCESS;
|
||||||
|
|
||||||
|
final CommandFormat commandFormat = getCommandFormat();
|
||||||
|
|
||||||
|
// check if there's more than one arguments
|
||||||
|
int flags = 0;
|
||||||
|
if (commandFormat.getOpt(CHECK_FLAG)) {
|
||||||
|
flags++;
|
||||||
|
}
|
||||||
|
if (commandFormat.getOpt(DDB_MS_CONSISTENCY_FLAG)) {
|
||||||
|
flags++;
|
||||||
|
}
|
||||||
|
if (flags > 1) {
|
||||||
|
out.println(USAGE);
|
||||||
|
throw invalidArgs("There should be only one parameter used for checking.");
|
||||||
|
}
|
||||||
|
|
||||||
String s3Path = paths.get(0);
|
String s3Path = paths.get(0);
|
||||||
try {
|
try {
|
||||||
initS3AFileSystem(s3Path);
|
initS3AFileSystem(s3Path);
|
||||||
@ -1600,25 +1618,25 @@ public int run(String[] args, PrintStream out) throws
|
|||||||
|
|
||||||
if (ms == null ||
|
if (ms == null ||
|
||||||
!(ms instanceof DynamoDBMetadataStore)) {
|
!(ms instanceof DynamoDBMetadataStore)) {
|
||||||
errorln(s3Path + " path uses MS: " + ms);
|
errorln(s3Path + " path uses metadata store: " + ms);
|
||||||
errorln(NAME + " can be only used with a DynamoDB backed s3a bucket.");
|
errorln(NAME + " can be only used with a DynamoDB backed s3a bucket.");
|
||||||
errorln(USAGE);
|
errorln(USAGE);
|
||||||
return ERROR;
|
return ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
final CommandFormat commandFormat = getCommandFormat();
|
List<S3GuardFsck.ComparePair> violations;
|
||||||
|
|
||||||
if (commandFormat.getOpt(CHECK_FLAG)) {
|
if (commandFormat.getOpt(CHECK_FLAG)) {
|
||||||
// do the check
|
// do the check
|
||||||
S3GuardFsck s3GuardFsck = new S3GuardFsck(fs, ms);
|
S3GuardFsck s3GuardFsck = new S3GuardFsck(fs, ms);
|
||||||
try {
|
try {
|
||||||
final List<S3GuardFsck.ComparePair> comparePairs
|
violations = s3GuardFsck.compareS3ToMs(fs.qualify(root));
|
||||||
= s3GuardFsck.compareS3ToMs(fs.qualify(root));
|
|
||||||
if (comparePairs.size() > 0) {
|
|
||||||
exitValue = EXIT_FAIL;
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
} else if (commandFormat.getOpt(DDB_MS_CONSISTENCY_FLAG)) {
|
||||||
|
S3GuardFsck s3GuardFsck = new S3GuardFsck(fs, ms);
|
||||||
|
violations = s3GuardFsck.checkDdbInternalConsistency(fs.qualify(root));
|
||||||
} else {
|
} else {
|
||||||
errorln("No supported operation is selected.");
|
errorln("No supported operation is selected.");
|
||||||
errorln(USAGE);
|
errorln(USAGE);
|
||||||
@ -1626,6 +1644,12 @@ public int run(String[] args, PrintStream out) throws
|
|||||||
}
|
}
|
||||||
|
|
||||||
out.flush();
|
out.flush();
|
||||||
|
|
||||||
|
// We fail if there were compare pairs, as the returned compare pairs
|
||||||
|
// contain issues.
|
||||||
|
if (violations == null || violations.size() > 0) {
|
||||||
|
exitValue = EXIT_FAIL;
|
||||||
|
}
|
||||||
return exitValue;
|
return exitValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -923,10 +923,47 @@ Metadata Store Diagnostics:
|
|||||||
table={ ... }
|
table={ ... }
|
||||||
write-capacity=20
|
write-capacity=20
|
||||||
```
|
```
|
||||||
|
|
||||||
*Note*: There is a limit to how many times in a 24 hour period the capacity
|
*Note*: There is a limit to how many times in a 24 hour period the capacity
|
||||||
of a bucket can be changed, either through this command or the AWS console.
|
of a bucket can be changed, either through this command or the AWS console.
|
||||||
|
|
||||||
|
### Check the consistency of the metadata store, `s3guard fsck`
|
||||||
|
|
||||||
|
Compares S3 with MetadataStore, and returns a failure status if any
|
||||||
|
rules or invariants are violated. Only works with DynamoDB metadata stores.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard fsck [-check | -internal] (s3a://BUCKET | s3a://PATH_PREFIX)
|
||||||
|
```
|
||||||
|
|
||||||
|
`-check` operation checks the metadata store from the S3 perspective, but
|
||||||
|
does not fix any issues.
|
||||||
|
The consistency issues will be logged in ERROR loglevel.
|
||||||
|
|
||||||
|
`-internal` operation checks the internal consistency of the metadata store,
|
||||||
|
but does not fix any issues.
|
||||||
|
|
||||||
|
The errors found will be logged at the ERROR log level.
|
||||||
|
|
||||||
|
*Note*: `-check` and `-internal` operations can be used only as separate
|
||||||
|
commands. Running `fsck` with both will result in an error.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard fsck -check s3a://ireland-1/path_prefix/
|
||||||
|
```
|
||||||
|
|
||||||
|
Checks the metadata store while iterating through the S3 bucket.
|
||||||
|
The path_prefix will be used as the root element of the check.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard fsck -internal s3a://ireland-1/path_prefix/
|
||||||
|
```
|
||||||
|
|
||||||
|
Checks the metadata store internal consistency.
|
||||||
|
The path_prefix will be used as the root element of the check.
|
||||||
|
|
||||||
|
|
||||||
## Debugging and Error Handling
|
## Debugging and Error Handling
|
||||||
|
|
||||||
If you run into network connectivity issues, or have a machine failure in the
|
If you run into network connectivity issues, or have a machine failure in the
|
||||||
|
@ -435,10 +435,126 @@ public void testTombstonedInMsNotDeletedInS3() throws Exception {
|
|||||||
|
|
||||||
assertComparePairsSize(comparePairs, 1);
|
assertComparePairsSize(comparePairs, 1);
|
||||||
|
|
||||||
// check fil1 that there's the violation
|
// check if the violation is there
|
||||||
checkForViolationInPairs(file, comparePairs,
|
checkForViolationInPairs(file, comparePairs,
|
||||||
S3GuardFsck.Violation.TOMBSTONED_IN_MS_NOT_DELETED_IN_S3);
|
S3GuardFsck.Violation.TOMBSTONED_IN_MS_NOT_DELETED_IN_S3);
|
||||||
// check the child that there's no NO_ETAG violation
|
} finally {
|
||||||
|
cleanup(file, cwd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void checkDdbInternalConsistency() throws Exception {
|
||||||
|
final S3GuardFsck s3GuardFsck = new S3GuardFsck(rawFs, metadataStore);
|
||||||
|
final DynamoDBMetadataStore ms =
|
||||||
|
(DynamoDBMetadataStore) guardedFs.getMetadataStore();
|
||||||
|
s3GuardFsck.checkDdbInternalConsistency(
|
||||||
|
new Path("s3a://" + guardedFs.getBucket() + "/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDdbInternalNoLastUpdatedField() throws Exception {
|
||||||
|
final Path cwd = path("/" + getMethodName() + "-" + UUID.randomUUID());
|
||||||
|
final Path file = new Path(cwd, "file");
|
||||||
|
try {
|
||||||
|
final S3AFileStatus s3AFileStatus = new S3AFileStatus(100, 100, file, 100,
|
||||||
|
"test", "etag", "version");
|
||||||
|
final PathMetadata pathMetadata = new PathMetadata(s3AFileStatus);
|
||||||
|
pathMetadata.setLastUpdated(0);
|
||||||
|
metadataStore.put(pathMetadata);
|
||||||
|
|
||||||
|
final S3GuardFsck s3GuardFsck = new S3GuardFsck(rawFs, metadataStore);
|
||||||
|
final List<S3GuardFsck.ComparePair> comparePairs =
|
||||||
|
s3GuardFsck.checkDdbInternalConsistency(cwd);
|
||||||
|
|
||||||
|
assertComparePairsSize(comparePairs, 1);
|
||||||
|
|
||||||
|
// check if the violation is there
|
||||||
|
checkForViolationInPairs(file, comparePairs,
|
||||||
|
S3GuardFsck.Violation.NO_LASTUPDATED_FIELD);
|
||||||
|
} finally {
|
||||||
|
cleanup(file, cwd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDdbInternalOrphanEntry() throws Exception {
|
||||||
|
final Path cwd = path("/" + getMethodName() + "-" + UUID.randomUUID());
|
||||||
|
final Path parentDir = new Path(cwd, "directory");
|
||||||
|
final Path file = new Path(parentDir, "file");
|
||||||
|
try {
|
||||||
|
final S3AFileStatus s3AFileStatus = new S3AFileStatus(100, 100, file, 100,
|
||||||
|
"test", "etag", "version");
|
||||||
|
final PathMetadata pathMetadata = new PathMetadata(s3AFileStatus);
|
||||||
|
pathMetadata.setLastUpdated(1000);
|
||||||
|
metadataStore.put(pathMetadata);
|
||||||
|
metadataStore.forgetMetadata(parentDir);
|
||||||
|
|
||||||
|
final S3GuardFsck s3GuardFsck = new S3GuardFsck(rawFs, metadataStore);
|
||||||
|
final List<S3GuardFsck.ComparePair> comparePairs =
|
||||||
|
s3GuardFsck.checkDdbInternalConsistency(cwd);
|
||||||
|
|
||||||
|
// check if the violation is there
|
||||||
|
assertComparePairsSize(comparePairs, 1);
|
||||||
|
checkForViolationInPairs(file, comparePairs,
|
||||||
|
S3GuardFsck.Violation.ORPHAN_DDB_ENTRY);
|
||||||
|
} finally {
|
||||||
|
cleanup(file, cwd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDdbInternalParentIsAFile() throws Exception {
|
||||||
|
final Path cwd = path("/" + getMethodName() + "-" + UUID.randomUUID());
|
||||||
|
final Path parentDir = new Path(cwd, "directory");
|
||||||
|
final Path file = new Path(parentDir, "file");
|
||||||
|
try {
|
||||||
|
final S3AFileStatus s3AFileStatus = new S3AFileStatus(100, 100, file, 100,
|
||||||
|
"test", "etag", "version");
|
||||||
|
final PathMetadata pathMetadata = new PathMetadata(s3AFileStatus);
|
||||||
|
pathMetadata.setLastUpdated(1000);
|
||||||
|
metadataStore.put(pathMetadata);
|
||||||
|
|
||||||
|
final S3AFileStatus dirAsFile = MetadataStoreTestBase
|
||||||
|
.basicFileStatus(parentDir, 1, false, 1);
|
||||||
|
final PathMetadata dirAsFilePm = new PathMetadata(dirAsFile);
|
||||||
|
dirAsFilePm.setLastUpdated(100);
|
||||||
|
metadataStore.put(dirAsFilePm);
|
||||||
|
|
||||||
|
final S3GuardFsck s3GuardFsck = new S3GuardFsck(rawFs, metadataStore);
|
||||||
|
final List<S3GuardFsck.ComparePair> comparePairs =
|
||||||
|
s3GuardFsck.checkDdbInternalConsistency(cwd);
|
||||||
|
|
||||||
|
// check if the violation is there
|
||||||
|
assertComparePairsSize(comparePairs, 1);
|
||||||
|
checkForViolationInPairs(file, comparePairs,
|
||||||
|
S3GuardFsck.Violation.PARENT_IS_A_FILE);
|
||||||
|
} finally {
|
||||||
|
cleanup(file, cwd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDdbInternalParentTombstoned() throws Exception {
|
||||||
|
final Path cwd = path("/" + getMethodName() + "-" + UUID.randomUUID());
|
||||||
|
final Path parentDir = new Path(cwd, "directory");
|
||||||
|
final Path file = new Path(parentDir, "file");
|
||||||
|
try {
|
||||||
|
final S3AFileStatus s3AFileStatus = new S3AFileStatus(100, 100, file, 100,
|
||||||
|
"test", "etag", "version");
|
||||||
|
final PathMetadata pathMetadata = new PathMetadata(s3AFileStatus);
|
||||||
|
pathMetadata.setLastUpdated(1000);
|
||||||
|
metadataStore.put(pathMetadata);
|
||||||
|
metadataStore.delete(parentDir, null);
|
||||||
|
|
||||||
|
final S3GuardFsck s3GuardFsck = new S3GuardFsck(rawFs, metadataStore);
|
||||||
|
final List<S3GuardFsck.ComparePair> comparePairs =
|
||||||
|
s3GuardFsck.checkDdbInternalConsistency(cwd);
|
||||||
|
|
||||||
|
// check if the violation is there
|
||||||
|
assertComparePairsSize(comparePairs, 1);
|
||||||
|
checkForViolationInPairs(file, comparePairs,
|
||||||
|
S3GuardFsck.Violation.PARENT_TOMBSTONED);
|
||||||
} finally {
|
} finally {
|
||||||
cleanup(file, cwd);
|
cleanup(file, cwd);
|
||||||
}
|
}
|
||||||
|
@ -304,11 +304,11 @@ public void testCLIFsckWithoutParam() throws Exception {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCLIFsckWithParam() throws Exception {
|
public void testCLIFsckWithParam() throws Exception {
|
||||||
|
LOG.info("This test serves the purpose to run fsck with the correct " +
|
||||||
|
"parameters, so there will be no exception thrown.");
|
||||||
final int result = run(S3GuardTool.Fsck.NAME, "-check",
|
final int result = run(S3GuardTool.Fsck.NAME, "-check",
|
||||||
"s3a://" + getFileSystem().getBucket());
|
"s3a://" + getFileSystem().getBucket());
|
||||||
LOG.info("This test serves the purpose to run fsck with the correct " +
|
LOG.info("The return value of the run: {}", result);
|
||||||
"parameters, so there will be no exception thrown. " +
|
|
||||||
"The return value of the run: {}", result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -324,4 +324,33 @@ public void testCLIFsckFailInitializeFs() throws Exception {
|
|||||||
() -> run(S3GuardTool.Fsck.NAME, "-check",
|
() -> run(S3GuardTool.Fsck.NAME, "-check",
|
||||||
"s3a://this-bucket-does-not-exist-" + UUID.randomUUID()));
|
"s3a://this-bucket-does-not-exist-" + UUID.randomUUID()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCLIFsckDDbInternalWrongS3APath() throws Exception {
|
||||||
|
intercept(FileNotFoundException.class, "wrong path",
|
||||||
|
() -> run(S3GuardTool.Fsck.NAME, "-"+Fsck.DDB_MS_CONSISTENCY_FLAG,
|
||||||
|
"s3a://" + getFileSystem().getBucket() + "/" + UUID.randomUUID()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCLIFsckDDbInternalParam() throws Exception {
|
||||||
|
describe("This test serves the purpose to run fsck with the correct " +
|
||||||
|
"parameters, so there will be no exception thrown.");
|
||||||
|
final int result = run(S3GuardTool.Fsck.NAME,
|
||||||
|
"-" + Fsck.DDB_MS_CONSISTENCY_FLAG,
|
||||||
|
"s3a://" + getFileSystem().getBucket());
|
||||||
|
LOG.info("The return value of the run: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCLIFsckCheckExclusive() throws Exception {
|
||||||
|
describe("There should be only one check param when running fsck." +
|
||||||
|
"If more then one param is passed, the command should fail." +
|
||||||
|
"This provide exclusive run for checks so the user is able to define " +
|
||||||
|
"the order of checking.");
|
||||||
|
intercept(ExitUtil.ExitException.class, "only one parameter",
|
||||||
|
() -> run(S3GuardTool.Fsck.NAME,
|
||||||
|
"-" + Fsck.DDB_MS_CONSISTENCY_FLAG, "-" + Fsck.CHECK_FLAG,
|
||||||
|
"s3a://" + getFileSystem().getBucket()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user