From f96fb05a2b889f3acdfae60d8f64c755ff48b8c1 Mon Sep 17 00:00:00 2001 From: Yiqun Lin Date: Wed, 3 Apr 2019 14:01:30 +0800 Subject: [PATCH] HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka. --- .../keyvalue/KeyValueContainerCheck.java | 285 +++++------------- .../keyvalue/TestKeyValueContainerCheck.java | 11 +- 2 files changed, 81 insertions(+), 215 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index b1ab1e1af8..bdfdf21b4c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -68,9 +68,26 @@ public KeyValueContainerCheck(String metadataPath, Configuration conf, } /** - * fast checks are basic and do not look inside the metadata files. - * Or into the structures on disk. These checks can be done on Open - * containers as well without concurrency implications + * Run basic integrity checks on container metadata. + * These checks do not look inside the metadata files. + * Applicable for OPEN containers. + * + * @return true : corruption detected, false : no corruption. + */ + public boolean fastCheck() { + boolean corruption = false; + try { + basicChecks(); + + } catch (IOException e) { + handleCorruption(e); + corruption = true; + } + + return corruption; + } + + /** * Checks : * 1. check directory layout * 2. check container file @@ -78,24 +95,14 @@ public KeyValueContainerCheck(String metadataPath, Configuration conf, * @return void */ - public KvCheckError fastCheck() { + private void basicChecks() throws IOException { - KvCheckError error; - LOG.trace("Running fast check for container {};", containerID); + LOG.trace("Running basic checks for container {};", containerID); - error = loadContainerData(); - if (error != KvCheckError.ERROR_NONE) { - return error; - } + loadContainerData(); - error = checkLayout(); - if (error != KvCheckError.ERROR_NONE) { - return error; - } - - error = checkContainerFile(); - - return error; + checkLayout(); + checkContainerFile(); } /** @@ -107,129 +114,80 @@ public KvCheckError fastCheck() { *

* fullCheck is a superset of fastCheck * - * @return void + * @return true : corruption detected, false : no corruption. */ - public KvCheckError fullCheck() { - /** + public boolean fullCheck() { + boolean corruption = false; - */ - KvCheckError error; + try { + basicChecks(); + checkBlockDB(); - error = fastCheck(); - if (error != KvCheckError.ERROR_NONE) { - - LOG.trace("fastCheck failed, aborting full check for Container {}", - containerID); - return error; + } catch (IOException e) { + handleCorruption(e); + corruption = true; } - error = checkBlockDB(); - - return error; + return corruption; } /** * Check the integrity of the directory structure of the container. - * - * @return error code or ERROR_NONE */ - private KvCheckError checkLayout() { - boolean success; - KvCheckError error = KvCheckError.ERROR_NONE; + private void checkLayout() throws IOException { // is metadataPath accessible as a directory? - try { - checkDirPath(metadataPath); - } catch (IOException ie) { - error = KvCheckError.METADATA_PATH_ACCESS; - handleCorruption(ie.getMessage(), error, ie); - return error; - } + checkDirPath(metadataPath); - String chunksPath = onDiskContainerData.getChunksPath(); // is chunksPath accessible as a directory? - try { - checkDirPath(chunksPath); - } catch (IOException ie) { - error = KvCheckError.CHUNKS_PATH_ACCESS; - handleCorruption(ie.getMessage(), error, ie); - return error; - } - - return error; + String chunksPath = onDiskContainerData.getChunksPath(); + checkDirPath(chunksPath); } private void checkDirPath(String path) throws IOException { File dirPath = new File(path); String errStr = null; - boolean success = true; try { if (!dirPath.isDirectory()) { - success = false; errStr = "Not a directory [" + path + "]"; + throw new IOException(errStr); } } catch (SecurityException se) { throw new IOException("Security exception checking dir [" + path + "]", se); - } catch (Exception e) { - throw new IOException("Generic exception checking dir [" - + path + "]", e); } - try { - String[] ls = dirPath.list(); - if (ls == null) { - // null result implies operation failed - success = false; - errStr = "null listing for directory [" + path + "]"; - } - } catch (Exception e) { - throw new IOException("Exception listing dir [" + path + "]", e); - } - - if (!success) { - Preconditions.checkState(errStr != null); + String[] ls = dirPath.list(); + if (ls == null) { + // null result implies operation failed + errStr = "null listing for directory [" + path + "]"; throw new IOException(errStr); } } - private KvCheckError checkContainerFile() { + private void checkContainerFile() throws IOException { /** * compare the values in the container file loaded from disk, * with the values we are expecting */ - KvCheckError error = KvCheckError.ERROR_NONE; String dbType; Preconditions .checkState(onDiskContainerData != null, "Container File not loaded"); - KvCheckAction next; - try { - ContainerUtils.verifyChecksum(onDiskContainerData); - } catch (Exception e) { - error = KvCheckError.CONTAINERDATA_CKSUM; - handleCorruption("Container File Checksum mismatch", error, e); - return error; - } + ContainerUtils.verifyChecksum(onDiskContainerData); if (onDiskContainerData.getContainerType() != ContainerProtos.ContainerType.KeyValueContainer) { String errStr = "Bad Container type in Containerdata for " + containerID; - error = KvCheckError.CONTAINERDATA_TYPE; - handleCorruption(errStr, error, null); - return error; // Abort if we do not know the type of Container + throw new IOException(errStr); } if (onDiskContainerData.getContainerID() != containerID) { String errStr = "Bad ContainerID field in Containerdata for " + containerID; - error = KvCheckError.CONTAINERDATA_ID; - next = handleCorruption(errStr, error, null); - if (next == KvCheckAction.ABORT) { - return error; - } // else continue checking other data elements + throw new IOException(errStr); } dbType = onDiskContainerData.getContainerDBType(); @@ -237,9 +195,7 @@ private KvCheckError checkContainerFile() { !dbType.equals(OZONE_METADATA_STORE_IMPL_LEVELDB)) { String errStr = "Unknown DBType [" + dbType + "] in Container File for [" + containerID + "]"; - error = KvCheckError.CONTAINERDATA_DBTYPE; - handleCorruption(errStr, error, null); - return error; + throw new IOException(errStr); } KeyValueContainerData kvData = onDiskContainerData; @@ -248,17 +204,11 @@ private KvCheckError checkContainerFile() { "Bad metadata path in Containerdata for " + containerID + "Expected [" + metadataPath.toString() + "] Got [" + kvData.getMetadataPath() + "]"; - error = KvCheckError.CONTAINERDATA_METADATA_PATH; - next = handleCorruption(errStr, error, null); - if (next == KvCheckAction.ABORT) { - return error; - } + throw new IOException(errStr); } - - return error; } - private KvCheckError checkBlockDB() { + private void checkBlockDB() throws IOException { /** * Check the integrity of the DB inside each container. * In Scope: @@ -269,52 +219,31 @@ private KvCheckError checkBlockDB() { * 1. chunk checksum verification. this is left to a separate * slow chunk scanner */ - KvCheckError error; Preconditions.checkState(onDiskContainerData != null, "invoke loadContainerData prior to calling this function"); File dbFile; File metaDir = new File(metadataPath); - try { - dbFile = KeyValueContainerLocationUtil - .getContainerDBFile(metaDir, containerID); + dbFile = KeyValueContainerLocationUtil + .getContainerDBFile(metaDir, containerID); - if (!dbFile.exists() || !dbFile.canRead()) { - - String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString() - + "] for Container [" + containerID + "] metadata path [" - + metadataPath + "]"; - error = KvCheckError.DB_ACCESS; - handleCorruption(dbFileErrorMsg, error, null); - return error; - } - } catch (Exception e) { - String dbFileErrorMessage = - "Exception when initializing DBFile" + "with metadatapath [" - + metadataPath + "] for Container [" + containerID - + "]"; - error = KvCheckError.DB_ACCESS; - handleCorruption(dbFileErrorMessage, error, e); - return error; + if (!dbFile.exists() || !dbFile.canRead()) { + String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString() + + "] for Container [" + containerID + "] metadata path [" + + metadataPath + "]"; + throw new IOException(dbFileErrorMsg); } + + onDiskContainerData.setDbFile(dbFile); + MetadataStore db = BlockUtils + .getDB(onDiskContainerData, checkConfig); - try { - MetadataStore db = BlockUtils - .getDB(onDiskContainerData, checkConfig); - error = iterateBlockDB(db); - } catch (Exception e) { - error = KvCheckError.DB_ITERATOR; - handleCorruption("Block DB Iterator aborted", error, e); - return error; - } - - return error; + iterateBlockDB(db); } - private KvCheckError iterateBlockDB(MetadataStore db) + private void iterateBlockDB(MetadataStore db) throws IOException { - KvCheckError error = KvCheckError.ERROR_NONE; Preconditions.checkState(db != null); // get "normal" keys from the Block DB @@ -328,103 +257,39 @@ private KvCheckError iterateBlockDB(MetadataStore db) List chunkInfoList = block.getChunks(); for (ContainerProtos.ChunkInfo chunk : chunkInfoList) { File chunkFile; - try { - chunkFile = ChunkUtils - .getChunkFile(onDiskContainerData, - ChunkInfo.getFromProtoBuf(chunk)); - } catch (Exception e) { - error = KvCheckError.MISSING_CHUNK_FILE; - handleCorruption("Unable to access chunk path", error, e); - return error; - } + chunkFile = ChunkUtils.getChunkFile(onDiskContainerData, + ChunkInfo.getFromProtoBuf(chunk)); if (!chunkFile.exists()) { - error = KvCheckError.MISSING_CHUNK_FILE; - // concurrent mutation in Block DB? lookup the block again. byte[] bdata = db.get( Longs.toByteArray(block.getBlockID().getLocalID())); if (bdata == null) { LOG.trace("concurrency with delete, ignoring deleted block"); - error = KvCheckError.ERROR_NONE; break; // skip to next block from kvIter } else { - handleCorruption("Missing chunk file", error, null); - return error; + String errorStr = "Missing chunk file " + + chunkFile.getAbsolutePath(); + throw new IOException(errorStr); } } } } - - return error; } - private KvCheckError loadContainerData() { - KvCheckError error = KvCheckError.ERROR_NONE; + private void loadContainerData() throws IOException { File containerFile = KeyValueContainer .getContainerFile(metadataPath.toString(), containerID); - try { - onDiskContainerData = (KeyValueContainerData) ContainerDataYaml - .readContainerFile(containerFile); - } catch (IOException e) { - error = KvCheckError.FILE_LOAD; - handleCorruption("Unable to load Container File", error, e); - } - - return error; + onDiskContainerData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(containerFile); } - private KvCheckAction handleCorruption(String reason, - KvCheckError error, Exception e) { - - // XXX HDDS-1201 need to implement corruption handling/reporting - + private void handleCorruption(IOException e) { String errStr = - "Corruption detected in container: [" + containerID + "] reason: [" - + reason + "] error code: [" + error + "]"; - String logMessage = null; - - StackTraceElement[] stackeElems = Thread.currentThread().getStackTrace(); - String caller = - "Corruption reported from Source File: [" + stackeElems[2].getFileName() - + "] Line: [" + stackeElems[2].getLineNumber() + "]"; - - if (e != null) { - logMessage = errStr + " exception: [" + e.getMessage() + "]"; - e.printStackTrace(); - } else { - logMessage = errStr; - } - - LOG.error(caller); + "Corruption detected in container: [" + containerID + "] "; + String logMessage = errStr + "Exception: [" + e.getMessage() + "]"; LOG.error(logMessage); - - return KvCheckAction.ABORT; - } - - /** - * Pre-defined error codes for Container Metadata check. - */ - public enum KvCheckError { - ERROR_NONE, - FILE_LOAD, // unable to load container metafile - METADATA_PATH_ACCESS, // metadata path is not accessible - CHUNKS_PATH_ACCESS, // chunks path is not accessible - CONTAINERDATA_ID, // bad Container-ID stored in Container file - CONTAINERDATA_METADATA_PATH, // bad metadata path in Container file - CONTAINERDATA_CHUNKS_PATH, // bad chunks path in Container file - CONTAINERDATA_CKSUM, // container file checksum mismatch - CONTAINERDATA_TYPE, // container file incorrect type of Container - CONTAINERDATA_DBTYPE, // unknown DB Type specified in Container File - DB_ACCESS, // unable to load Metastore DB - DB_ITERATOR, // unable to create block iterator for Metastore DB - MISSING_CHUNK_FILE // chunk file not found - } - - private enum KvCheckAction { - CONTINUE, // Continue with remaining checks on the corrupt Container - ABORT // Abort checks for the container } } \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index 3395214895..0bc1bbc387 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -55,6 +55,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_ROCKSDB; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** @@ -100,7 +101,7 @@ public TestKeyValueContainerCheck(String metadataImpl) { int deletedBlocks = 1; int normalBlocks = 3; int chunksPerBlock = 4; - KeyValueContainerCheck.KvCheckError error; + boolean corruption = false; // test Closed Container createContainerWithBlocks(containerID, normalBlocks, deletedBlocks, 65536, @@ -114,14 +115,14 @@ public TestKeyValueContainerCheck(String metadataImpl) { containerID); // first run checks on a Open Container - error = kvCheck.fastCheck(); - assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE); + corruption = kvCheck.fastCheck(); + assertFalse(corruption); container.close(); // next run checks on a Closed Container - error = kvCheck.fullCheck(); - assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE); + corruption = kvCheck.fullCheck(); + assertFalse(corruption); } /**