From 7b026c50f1be399987d23e06b4ecfbffc51dc7b5 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Fri, 31 Oct 2014 13:15:17 -0700 Subject: [PATCH] HDFS-6917. Add an hdfs debug command to validate blocks, call recoverlease, etc. (cmccabe) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hadoop-hdfs/src/main/bin/hdfs | 4 + .../apache/hadoop/hdfs/tools/DebugAdmin.java | 361 ++++++++++++++++++ .../hadoop/hdfs/tools/TestDebugAdmin.java | 118 ++++++ 4 files changed, 486 insertions(+) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDebugAdmin.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 792839ff72..fe6d08a45e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -324,6 +324,9 @@ Release 2.7.0 - UNRELEASED HDFS-7035. Make adding a new data directory to the DataNode an atomic operation and improve error handling (Lei Xu via Colin P. McCabe) + + HDFS-6917. Add an hdfs debug command to validate blocks, call recoverlease, + etc. (cmccabe) OPTIMIZATIONS diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 67e82f1988..d47df118a0 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -53,6 +53,7 @@ function hadoop_usage echo " zkfc run the ZK Failover Controller daemon" echo "" echo "Most commands print help when invoked w/o parameters." + # There are also debug commands, but they don't show up in this listing. } # let's locate libexec... @@ -121,6 +122,9 @@ case ${COMMAND} in CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode' fi ;; + debug) + CLASS='org.apache.hadoop.hdfs.tools.DebugAdmin' + ;; dfs) CLASS=org.apache.hadoop.fs.FsShell hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java new file mode 100644 index 0000000000..41f1ca4801 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools; + +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import com.google.common.util.concurrent.Uninterruptibles; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.DataChecksum; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; + +/** + * This class implements debug operations on the HDFS command-line. + * + * These operations are only for debugging, and may change or disappear + * between HDFS versions. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class DebugAdmin extends Configured implements Tool { + /** + * All the debug commands we can run. + */ + private DebugCommand DEBUG_COMMANDS[] = { + new VerifyBlockChecksumCommand(), + new RecoverLeaseCommand(), + new HelpCommand() + }; + + /** + * The base class for debug commands. + */ + private abstract class DebugCommand { + final String name; + final String usageText; + final String helpText; + + DebugCommand(String name, String usageText, String helpText) { + this.name = name; + this.usageText = usageText; + this.helpText = helpText; + } + + abstract int run(List args) throws IOException; + } + + private static int HEADER_LEN = 7; + + /** + * The command for verifying a block metadata file and possibly block file. + */ + private class VerifyBlockChecksumCommand extends DebugCommand { + VerifyBlockChecksumCommand() { + super("verify", +"verify [-meta ] [-block ]", +" Verify HDFS metadata and block files. If a block file is specified, we\n" + +" will verify that the checksums in the metadata file match the block\n" + +" file."); + } + + int run(List args) throws IOException { + if (args.size() == 0) { + System.out.println(usageText); + System.out.println(helpText + "\n"); + return 1; + } + String blockFile = StringUtils.popOptionWithArgument("-block", args); + String metaFile = StringUtils.popOptionWithArgument("-meta", args); + if (metaFile == null) { + System.err.println("You must specify a meta file with -meta"); + return 1; + } + + FileInputStream metaStream = null, dataStream = null; + FileChannel metaChannel = null, dataChannel = null; + DataInputStream checksumStream = null; + try { + BlockMetadataHeader header; + try { + metaStream = new FileInputStream(metaFile); + checksumStream = new DataInputStream(metaStream); + header = BlockMetadataHeader.readHeader(checksumStream); + metaChannel = metaStream.getChannel(); + metaChannel.position(HEADER_LEN); + } catch (RuntimeException e) { + System.err.println("Failed to read HDFS metadata file header for " + + metaFile + ": " + StringUtils.stringifyException(e)); + return 1; + } catch (IOException e) { + System.err.println("Failed to read HDFS metadata file header for " + + metaFile + ": " + StringUtils.stringifyException(e)); + return 1; + } + DataChecksum checksum = header.getChecksum(); + System.out.println("Checksum type: " + checksum.toString()); + if (blockFile == null) { + return 0; + } + ByteBuffer metaBuf, dataBuf; + try { + dataStream = new FileInputStream(blockFile); + dataChannel = dataStream.getChannel(); + final int CHECKSUMS_PER_BUF = 1024 * 32; + metaBuf = ByteBuffer.allocate(checksum. + getChecksumSize() * CHECKSUMS_PER_BUF); + dataBuf = ByteBuffer.allocate(checksum. + getBytesPerChecksum() * CHECKSUMS_PER_BUF); + } catch (IOException e) { + System.err.println("Failed to open HDFS block file for " + + blockFile + ": " + StringUtils.stringifyException(e)); + return 1; + } + long offset = 0; + while (true) { + dataBuf.clear(); + int dataRead = -1; + try { + dataRead = dataChannel.read(dataBuf); + if (dataRead < 0) { + break; + } + } catch (IOException e) { + System.err.println("Got I/O error reading block file " + + blockFile + "from disk at offset " + dataChannel.position() + + ": " + StringUtils.stringifyException(e)); + return 1; + } + try { + int csumToRead = + (((checksum.getBytesPerChecksum() - 1) + dataRead) / + checksum.getBytesPerChecksum()) * + checksum.getChecksumSize(); + metaBuf.clear(); + metaBuf.limit(csumToRead); + metaChannel.read(metaBuf); + dataBuf.flip(); + metaBuf.flip(); + } catch (IOException e) { + System.err.println("Got I/O error reading metadata file " + + metaFile + "from disk at offset " + metaChannel.position() + + ": " + StringUtils.stringifyException(e)); + return 1; + } + try { + checksum.verifyChunkedSums(dataBuf, metaBuf, + blockFile, offset); + } catch (IOException e) { + System.out.println("verifyChunkedSums error: " + + StringUtils.stringifyException(e)); + return 1; + } + offset += dataRead; + } + System.out.println("Checksum verification succeeded on block file " + + blockFile); + return 0; + } finally { + IOUtils.cleanup(null, metaStream, dataStream, checksumStream); + } + } + } + + /** + * The command for recovering a file lease. + */ + private class RecoverLeaseCommand extends DebugCommand { + RecoverLeaseCommand() { + super("recoverLease", +"recoverLease [-path ] [-retries ]", +" Recover the lease on the specified path. The path must reside on an\n" + +" HDFS filesystem. The default number of retries is 1."); + } + + private static final int TIMEOUT_MS = 5000; + + int run(List args) throws IOException { + if (args.size() == 0) { + System.out.println(usageText); + System.out.println(helpText + "\n"); + return 1; + } + String pathStr = StringUtils.popOptionWithArgument("-path", args); + String retriesStr = StringUtils.popOptionWithArgument("-retries", args); + if (pathStr == null) { + System.err.println("You must supply a -path argument to " + + "recoverLease."); + return 1; + } + int maxRetries = 1; + if (retriesStr != null) { + try { + maxRetries = Integer.parseInt(retriesStr); + } catch (NumberFormatException e) { + System.err.println("Failed to parse the argument to -retries: " + + StringUtils.stringifyException(e)); + return 1; + } + } + FileSystem fs; + try { + fs = FileSystem.newInstance(new URI(pathStr), getConf(), null); + } catch (URISyntaxException e) { + System.err.println("URISyntaxException for " + pathStr + ":" + + StringUtils.stringifyException(e)); + return 1; + } catch (InterruptedException e) { + System.err.println("InterruptedException for " + pathStr + ":" + + StringUtils.stringifyException(e)); + return 1; + } + DistributedFileSystem dfs = null; + try { + dfs = (DistributedFileSystem) fs; + } catch (ClassCastException e) { + System.err.println("Invalid filesystem for path " + pathStr + ": " + + "needed scheme hdfs, but got: " + fs.getScheme()); + return 1; + } + for (int retry = 0; true; ) { + boolean recovered = false; + IOException ioe = null; + try { + recovered = dfs.recoverLease(new Path(pathStr)); + } catch (IOException e) { + ioe = e; + } + if (recovered) { + System.out.println("recoverLease SUCCEEDED on " + pathStr); + return 0; + } + if (ioe != null) { + System.err.println("recoverLease got exception: "); + ioe.printStackTrace(); + } else { + System.err.println("recoverLease returned false."); + } + retry++; + if (retry >= maxRetries) { + break; + } + System.err.println("Retrying in " + TIMEOUT_MS + " ms..."); + Uninterruptibles.sleepUninterruptibly(TIMEOUT_MS, + TimeUnit.MILLISECONDS); + System.err.println("Retry #" + retry); + } + System.err.println("Giving up on recoverLease for " + pathStr + " after " + + maxRetries + (maxRetries == 1 ? " try." : " tries.")); + return 1; + } + } + + /** + * The command for getting help about other commands. + */ + private class HelpCommand extends DebugCommand { + HelpCommand() { + super("help", +"help [command-name]", +" Get help about a command."); + } + + int run(List args) { + DebugCommand command = popCommand(args); + if (command == null) { + printUsage(); + return 0; + } + System.out.println(command.usageText); + System.out.println(command.helpText + "\n"); + return 0; + } + } + + public DebugAdmin(Configuration conf) { + super(conf); + } + + private DebugCommand popCommand(List args) { + String commandStr = (args.size() == 0) ? "" : args.get(0); + if (commandStr.startsWith("-")) { + commandStr = commandStr.substring(1); + } + for (DebugCommand command : DEBUG_COMMANDS) { + if (command.name.equals(commandStr)) { + args.remove(0); + return command; + } + } + return null; + } + + public int run(String[] argv) { + LinkedList args = new LinkedList(); + for (int j = 0; j < argv.length; ++j) { + args.add(argv[j]); + } + DebugCommand command = popCommand(args); + if (command == null) { + printUsage(); + return 0; + } + try { + return command.run(args); + } catch (IOException e) { + System.err.println("IOException: " + + StringUtils.stringifyException(e)); + return 1; + } catch (RuntimeException e) { + System.err.println("RuntimeException: " + + StringUtils.stringifyException(e)); + return 1; + } + } + + private void printUsage() { + System.out.println("Usage: hdfs debug [arguments]\n"); + for (DebugCommand command : DEBUG_COMMANDS) { + if (!command.name.equals("help")) { + System.out.println(command.usageText); + } + } + } + + public static void main(String[] argsArray) throws IOException { + DebugAdmin debugAdmin = new DebugAdmin(new Configuration()); + System.exit(debugAdmin.run(argsArray)); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDebugAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDebugAdmin.java new file mode 100644 index 0000000000..44b6ba9cde --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDebugAdmin.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; +import org.apache.hadoop.io.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; + +import static org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil.*; +import static org.junit.Assert.assertEquals; + +public class TestDebugAdmin { + private MiniDFSCluster cluster; + private DistributedFileSystem fs; + private DebugAdmin admin; + private DataNode datanode; + + @Before + public void setUp() throws Exception { + Configuration conf = new Configuration(); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + cluster.waitActive(); + fs = cluster.getFileSystem(); + admin = new DebugAdmin(conf); + datanode = cluster.getDataNodes().get(0); + } + + @After + public void tearDown() throws Exception { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + } + + private String runCmd(String[] cmd) throws Exception { + final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + final PrintStream out = new PrintStream(bytes); + final PrintStream oldErr = System.err; + final PrintStream oldOut = System.out; + System.setErr(out); + System.setOut(out); + int ret; + try { + ret = admin.run(cmd); + } finally { + System.setErr(oldErr); + System.setOut(oldOut); + IOUtils.closeStream(out); + } + return "ret: " + ret + ", " + bytes.toString(); + } + + @Test(timeout = 60000) + public void testRecoverLease() throws Exception { + assertEquals("ret: 1, You must supply a -path argument to recoverLease.\n", + runCmd(new String[]{"recoverLease", "-retries", "1"})); + FSDataOutputStream out = fs.create(new Path("/foo")); + out.write(123); + out.close(); + assertEquals("ret: 0, recoverLease SUCCEEDED on /foo\n", + runCmd(new String[]{"recoverLease", "-path", "/foo"})); + } + + @Test(timeout = 60000) + public void testVerifyBlockChecksumCommand() throws Exception { + DFSTestUtil.createFile(fs, new Path("/bar"), 1234, (short) 1, 0xdeadbeef); + FsDatasetSpi fsd = datanode.getFSDataset(); + ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, new Path("/bar")); + File blockFile = getBlockFile(fsd, + block.getBlockPoolId(), block.getLocalBlock()); + assertEquals("ret: 1, You must specify a meta file with -meta\n", + runCmd(new String[]{"verify", "-block", blockFile.getAbsolutePath()})); + File metaFile = getMetaFile(fsd, + block.getBlockPoolId(), block.getLocalBlock()); + assertEquals("ret: 0, Checksum type: " + + "DataChecksum(type=CRC32C, chunkSize=512)\n", + runCmd(new String[]{"verify", + "-meta", metaFile.getAbsolutePath()})); + assertEquals("ret: 0, Checksum type: " + + "DataChecksum(type=CRC32C, chunkSize=512)\n" + + "Checksum verification succeeded on block file " + + blockFile.getAbsolutePath() + "\n", + runCmd(new String[]{"verify", + "-meta", metaFile.getAbsolutePath(), + "-block", blockFile.getAbsolutePath()}) + ); + } +}