From caf7298e49f646a40023af999f62d61846fde5e2 Mon Sep 17 00:00:00 2001
From: Andrew Wang
Date: Wed, 28 Jan 2015 12:36:29 -0800
Subject: [PATCH] HDFS-6673. Add delimited format support to PB OIV tool.
Contributed by Eddy Xu.
---
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 +
hadoop-hdfs-project/hadoop-hdfs/pom.xml | 5 +
.../offlineImageViewer/FSImageLoader.java | 4 +-
.../OfflineImageViewerPB.java | 21 +-
.../PBImageDelimitedTextWriter.java | 132 ++++
.../offlineImageViewer/PBImageTextWriter.java | 586 ++++++++++++++++++
.../TestOfflineImageViewer.java | 54 +-
7 files changed, 799 insertions(+), 5 deletions(-)
create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 973f2f0bba..4bd2c557bd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -309,6 +309,8 @@ Release 2.7.0 - UNRELEASED
HDFS-7056. Snapshot support for truncate. (Plamen Jeliazkov and shv)
+ HDFS-6673. Add delimited format support to PB OIV tool. (Eddy Xu via wang)
+
IMPROVEMENTS
HDFS-7055. Add tracing to DFSInputStream (cmccabe)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index bad1792546..d5c1f35e8a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -198,6 +198,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
test-jar
test
+
+ org.fusesource.leveldbjni
+ leveldbjni-all
+ 1.8
+
org.bouncycastle
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
index 2f2fa5fa38..fd29106192 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
@@ -237,7 +237,7 @@ private static byte[][] loadINodeSection(InputStream in)
return inodes;
}
- private static String[] loadStringTable(InputStream in) throws
+ static String[] loadStringTable(InputStream in) throws
IOException {
FsImageProto.StringTableSection s = FsImageProto.StringTableSection
.parseDelimitedFrom(in);
@@ -479,7 +479,7 @@ private long lookup(String path) throws IOException {
}
}
- private long getFileSize(FsImageProto.INodeSection.INodeFile f) {
+ static long getFileSize(FsImageProto.INodeSection.INodeFile f) {
long size = 0;
for (HdfsProtos.BlockProto p : f.getBlocksList()) {
size += p.getNumBytes();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
index 4fce6a3c07..659036691e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
@@ -65,6 +65,10 @@ public class OfflineImageViewerPB {
+ " -step defines the granularity of the distribution. (2MB by default)\n"
+ " * Web: Run a viewer to expose read-only WebHDFS API.\n"
+ " -addr specifies the address to listen. (localhost:5978 by default)\n"
+ + " * Delimited: Generate a text file with all of the elements common\n"
+ + " to both inodes and inodes-under-construction, separated by a\n"
+ + " delimiter. The default delimiter is \\t, though this may be\n"
+ + " changed via the -delimiter argument.\n"
+ "\n"
+ "Required command line arguments:\n"
+ "-i,--inputFile FSImage file to process.\n"
@@ -74,8 +78,12 @@ public class OfflineImageViewerPB {
+ " file exists, it will be overwritten.\n"
+ " (output to stdout by default)\n"
+ "-p,--processor Select which type of processor to apply\n"
- + " against image file. (XML|FileDistribution|Web)\n"
+ + " against image file. (XML|FileDistribution|Web|Delimited)\n"
+ " (Web by default)\n"
+ + "-delimiter Delimiting string to use with Delimited processor\n"
+ + "-t,--temp Use temporary dir to cache intermediate result to generate\n"
+ + " Delimited outputs. If not set, Delimited processor constructs\n"
+ + " the namespace in memory before outputting text."
+ "-h,--help Display usage information and exit\n";
/**
@@ -97,6 +105,8 @@ private static Options buildOptions() {
options.addOption("maxSize", true, "");
options.addOption("step", true, "");
options.addOption("addr", true, "");
+ options.addOption("delimiter", true, "");
+ options.addOption("t", "temp", true, "");
return options;
}
@@ -141,6 +151,9 @@ public static int run(String[] args) throws Exception {
String inputFile = cmd.getOptionValue("i");
String processor = cmd.getOptionValue("p", "Web");
String outputFile = cmd.getOptionValue("o", "-");
+ String delimiter = cmd.getOptionValue("delimiter",
+ PBImageDelimitedTextWriter.DEFAULT_DELIMITER);
+ String tempPath = cmd.getOptionValue("t", "");
Configuration conf = new Configuration();
try (PrintStream out = outputFile.equals("-") ?
@@ -163,6 +176,12 @@ public static int run(String[] args) throws Exception {
viewer.start(inputFile);
}
break;
+ case "Delimited":
+ try (PBImageDelimitedTextWriter writer =
+ new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
+ writer.visit(new RandomAccessFile(inputFile, "r"));
+ }
+ break;
}
return 0;
} catch (EOFException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
new file mode 100644
index 0000000000..350967d1ff
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+/**
+ * A PBImageDelimitedTextWriter generates a text representation of the PB fsimage,
+ * with each element separated by a delimiter string. All of the elements
+ * common to both inodes and inodes-under-construction are included. When
+ * processing an fsimage with a layout version that did not include an
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ *
+ * Individual block information for each file is not currently included.
+ *
+ * The default delimiter is tab, as this is an unlikely value to be included in
+ * an inode path or other text metadata. The delimiter value can be via the
+ * constructor.
+ */
+public class PBImageDelimitedTextWriter extends PBImageTextWriter {
+ static final String DEFAULT_DELIMITER = "\t";
+ private static final String DATE_FORMAT="yyyy-MM-dd HH:mm";
+ private final SimpleDateFormat dateFormatter =
+ new SimpleDateFormat(DATE_FORMAT);
+
+ private final String delimiter;
+
+ PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath)
+ throws IOException {
+ super(out, tempPath);
+ this.delimiter = delimiter;
+ }
+
+ private String formatDate(long date) {
+ return dateFormatter.format(new Date(date));
+ }
+
+ private void append(StringBuffer buffer, int field) {
+ buffer.append(delimiter);
+ buffer.append(field);
+ }
+
+ private void append(StringBuffer buffer, long field) {
+ buffer.append(delimiter);
+ buffer.append(field);
+ }
+
+ private void append(StringBuffer buffer, String field) {
+ buffer.append(delimiter);
+ buffer.append(field);
+ }
+
+ @Override
+ public String getEntry(String parent, INode inode) {
+ StringBuffer buffer = new StringBuffer();
+ String path = new File(parent, inode.getName().toStringUtf8()).toString();
+ buffer.append(path);
+ PermissionStatus p = null;
+
+ switch (inode.getType()) {
+ case FILE:
+ INodeFile file = inode.getFile();
+ p = getPermission(file.getPermission());
+ append(buffer, file.getReplication());
+ append(buffer, formatDate(file.getModificationTime()));
+ append(buffer, formatDate(file.getAccessTime()));
+ append(buffer, file.getPreferredBlockSize());
+ append(buffer, file.getBlocksCount());
+ append(buffer, FSImageLoader.getFileSize(file));
+ append(buffer, 0); // NS_QUOTA
+ append(buffer, 0); // DS_QUOTA
+ break;
+ case DIRECTORY:
+ INodeDirectory dir = inode.getDirectory();
+ p = getPermission(dir.getPermission());
+ append(buffer, 0); // Replication
+ append(buffer, formatDate(dir.getModificationTime()));
+ append(buffer, formatDate(0)); // Access time.
+ append(buffer, 0); // Block size.
+ append(buffer, 0); // Num blocks.
+ append(buffer, 0); // Num bytes.
+ append(buffer, dir.getNsQuota());
+ append(buffer, dir.getDsQuota());
+ break;
+ case SYMLINK:
+ INodeSymlink s = inode.getSymlink();
+ p = getPermission(s.getPermission());
+ append(buffer, 0); // Replication
+ append(buffer, formatDate(s.getModificationTime()));
+ append(buffer, formatDate(s.getAccessTime()));
+ append(buffer, 0); // Block size.
+ append(buffer, 0); // Num blocks.
+ append(buffer, 0); // Num bytes.
+ append(buffer, 0); // NS_QUOTA
+ append(buffer, 0); // DS_QUOTA
+ break;
+ default:
+ break;
+ }
+ assert p != null;
+ append(buffer, p.getPermission().toString());
+ append(buffer, p.getUserName());
+ append(buffer, p.getGroupName());
+ return buffer.toString();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
new file mode 100644
index 0000000000..0da263d87a
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
@@ -0,0 +1,586 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.io.LimitInputStream;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.Time;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.Options;
+import org.iq80.leveldb.WriteBatch;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.io.RandomAccessFile;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class reads the protobuf-based fsimage and generates text output
+ * for each inode to {@link PBImageTextWriter#out}. The sub-class can override
+ * {@link getEntry()} to generate formatted string for each inode.
+ *
+ * Since protobuf-based fsimage does not guarantee the order of inodes and
+ * directories, PBImageTextWriter runs two-phase scans:
+ *
+ *
+ * - The first phase, PBImageTextWriter scans the INode sections to reads the
+ * filename of each directory. It also scans the INode_Dir sections to loads
+ * the relationships between a directory and its children. It uses these metadata
+ * to build FS namespace and stored in {@link MetadataMap}
+ * - The second phase, PBImageTextWriter re-scans the INode sections. For each
+ * inode, it looks up the path of the parent directory in the {@link MetadataMap},
+ * and generate output.
+ *
+ *
+ * Two various of {@link MetadataMap} are provided. {@link InMemoryMetadataDB}
+ * stores all metadata in memory (O(n) memory) while
+ * {@link LevelDBMetadataMap} stores metadata in LevelDB on disk (O(1) memory).
+ * User can choose between them based on the time/space tradeoffs.
+ */
+abstract class PBImageTextWriter implements Closeable {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(PBImageTextWriter.class);
+
+ /**
+ * This metadata map is used to construct the namespace before generating
+ * text outputs.
+ *
+ * It contains two mapping relationships:
+ *
+ *
It maps each inode (inode Id) to its parent directory (inode Id).
+ * It maps each directory from its inode Id.
+ *
+ */
+ private static interface MetadataMap extends Closeable {
+ /**
+ * Associate an inode with its parent directory.
+ */
+ public void putDirChild(long parentId, long childId) throws IOException;
+
+ /**
+ * Associate a directory with its inode Id.
+ */
+ public void putDir(INode dir) throws IOException;
+
+ /** Get the full path of the parent directory for the given inode. */
+ public String getParentPath(long inode) throws IOException;
+
+ /** Synchronize metadata to persistent storage, if possible */
+ public void sync() throws IOException;
+ }
+
+ /**
+ * Maintain all the metadata in memory.
+ */
+ private static class InMemoryMetadataDB implements MetadataMap {
+ /**
+ * Represent a directory in memory.
+ */
+ private static class Dir {
+ private final long inode;
+ private Dir parent = null;
+ private String name;
+ private String path = null; // cached full path of the directory.
+
+ Dir(long inode, String name) {
+ this.inode = inode;
+ this.name = name;
+ }
+
+ private void setParent(Dir parent) {
+ Preconditions.checkState(this.parent == null);
+ this.parent = parent;
+ }
+
+ /**
+ * Returns the full path of this directory.
+ */
+ private String getPath() {
+ if (this.parent == null) {
+ return "/";
+ }
+ if (this.path == null) {
+ this.path = new File(parent.getPath(), name).toString();
+ this.name = null;
+ }
+ return this.path;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o instanceof Dir && inode == ((Dir) o).inode;
+ }
+
+ @Override
+ public int hashCode() {
+ return Long.valueOf(inode).hashCode();
+ }
+ }
+
+ /** INode Id to Dir object mapping */
+ private Map dirMap = new HashMap<>();
+
+ /** Children to parent directory INode ID mapping. */
+ private Map dirChildMap = new HashMap<>();
+
+ InMemoryMetadataDB() {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public void putDirChild(long parentId, long childId) {
+ Dir parent = dirMap.get(parentId);
+ Dir child = dirMap.get(childId);
+ if (child != null) {
+ child.setParent(parent);
+ }
+ Preconditions.checkState(!dirChildMap.containsKey(childId));
+ dirChildMap.put(childId, parent);
+ }
+
+ @Override
+ public void putDir(INode p) {
+ Preconditions.checkState(!dirMap.containsKey(p.getId()));
+ Dir dir = new Dir(p.getId(), p.getName().toStringUtf8());
+ dirMap.put(p.getId(), dir);
+ }
+
+ public String getParentPath(long inode) throws IOException {
+ if (inode == INodeId.ROOT_INODE_ID) {
+ return "";
+ }
+ Dir parent = dirChildMap.get(inode);
+ Preconditions.checkState(parent != null,
+ "Can not find parent directory for INode: %s", inode);
+ return parent.getPath();
+ }
+
+ @Override
+ public void sync() {
+ }
+ }
+
+ /**
+ * A MetadataMap that stores metadata in LevelDB.
+ */
+ private static class LevelDBMetadataMap implements MetadataMap {
+ /**
+ * Store metadata in LevelDB.
+ */
+ private static class LevelDBStore implements Closeable {
+ private DB db = null;
+ private WriteBatch batch = null;
+ private int writeCount = 0;
+ private static final int BATCH_SIZE = 1024;
+
+ LevelDBStore(final File dbPath) throws IOException {
+ Options options = new Options();
+ options.createIfMissing(true);
+ options.errorIfExists(true);
+ db = JniDBFactory.factory.open(dbPath, options);
+ batch = db.createWriteBatch();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (batch != null) {
+ IOUtils.cleanup(null, batch);
+ batch = null;
+ }
+ IOUtils.cleanup(null, db);
+ db = null;
+ }
+
+ public void put(byte[] key, byte[] value) throws IOException {
+ batch.put(key, value);
+ writeCount++;
+ if (writeCount >= BATCH_SIZE) {
+ sync();
+ }
+ }
+
+ public byte[] get(byte[] key) throws IOException {
+ return db.get(key);
+ }
+
+ public void sync() throws IOException {
+ try {
+ db.write(batch);
+ } finally {
+ batch.close();
+ batch = null;
+ }
+ batch = db.createWriteBatch();
+ writeCount = 0;
+ }
+ }
+
+ /**
+ * A LRU cache for directory path strings.
+ *
+ * The key of this LRU cache is the inode of a directory.
+ */
+ private static class DirPathCache extends LinkedHashMap {
+ private final static int CAPACITY = 16 * 1024;
+
+ DirPathCache() {
+ super(CAPACITY);
+ }
+
+ @Override
+ protected boolean removeEldestEntry(Map.Entry entry) {
+ return super.size() > CAPACITY;
+ }
+ }
+
+ /** Map the child inode to the parent directory inode. */
+ private LevelDBStore dirChildMap = null;
+ /** Directory entry map */
+ private LevelDBStore dirMap = null;
+ private DirPathCache dirPathCache = new DirPathCache();
+
+ LevelDBMetadataMap(String baseDir) throws IOException {
+ File dbDir = new File(baseDir);
+ if (dbDir.exists()) {
+ FileUtils.deleteDirectory(dbDir);
+ }
+ if (!dbDir.mkdirs()) {
+ throw new IOException("Failed to mkdir on " + dbDir);
+ }
+ try {
+ dirChildMap = new LevelDBStore(new File(dbDir, "dirChildMap"));
+ dirMap = new LevelDBStore(new File(dbDir, "dirMap"));
+ } catch (IOException e) {
+ LOG.error("Failed to open LevelDBs", e);
+ IOUtils.cleanup(null, this);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.cleanup(null, dirChildMap, dirMap);
+ dirChildMap = null;
+ dirMap = null;
+ }
+
+ private static byte[] toBytes(long value) {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ private static byte[] toBytes(String value)
+ throws UnsupportedEncodingException {
+ return value.getBytes("UTF-8");
+ }
+
+ private static long toLong(byte[] bytes) {
+ Preconditions.checkArgument(bytes.length == 8);
+ return ByteBuffer.wrap(bytes).getLong();
+ }
+
+ private static String toString(byte[] bytes) throws IOException {
+ try {
+ return new String(bytes, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public void putDirChild(long parentId, long childId) throws IOException {
+ dirChildMap.put(toBytes(childId), toBytes(parentId));
+ }
+
+ @Override
+ public void putDir(INode dir) throws IOException {
+ Preconditions.checkArgument(dir.hasDirectory(),
+ "INode %s (%s) is not a directory.", dir.getId(), dir.getName());
+ dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8()));
+ }
+
+ @Override
+ public String getParentPath(long inode) throws IOException {
+ if (inode == INodeId.ROOT_INODE_ID) {
+ return "/";
+ }
+ byte[] bytes = dirChildMap.get(toBytes(inode));
+ Preconditions.checkState(bytes != null && bytes.length == 8,
+ "Can not find parent directory for inode %s, "
+ + "fsimage might be corrupted", inode);
+ long parent = toLong(bytes);
+ if (!dirPathCache.containsKey(parent)) {
+ bytes = dirMap.get(toBytes(parent));
+ if (parent != INodeId.ROOT_INODE_ID) {
+ Preconditions.checkState(bytes != null,
+ "Can not find parent directory for inode %s, "
+ + ", the fsimage might be corrupted.", parent);
+ }
+ String parentName = toString(bytes);
+ String parentPath =
+ new File(getParentPath(parent), parentName).toString();
+ dirPathCache.put(parent, parentPath);
+ }
+ return dirPathCache.get(parent);
+ }
+
+ @Override
+ public void sync() throws IOException {
+ dirChildMap.sync();
+ dirMap.sync();
+ }
+ }
+
+ private String[] stringTable;
+ private PrintStream out;
+ private MetadataMap metadataMap = null;
+
+ /**
+ * Construct a PB FsImage writer to generate text file.
+ * @param out the writer to output text information of fsimage.
+ * @param tempPath the path to store metadata. If it is empty, store metadata
+ * in memory instead.
+ */
+ PBImageTextWriter(PrintStream out, String tempPath) throws IOException {
+ this.out = out;
+ if (tempPath.isEmpty()) {
+ metadataMap = new InMemoryMetadataDB();
+ } else {
+ metadataMap = new LevelDBMetadataMap(tempPath);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.cleanup(null, metadataMap);
+ }
+
+ /**
+ * Get text output for the given inode.
+ * @param parent the path of parent directory
+ * @param inode the INode object to output.
+ */
+ abstract protected String getEntry(String parent, INode inode);
+
+ public void visit(RandomAccessFile file) throws IOException {
+ Configuration conf = new Configuration();
+ if (!FSImageUtil.checkFileFormat(file)) {
+ throw new IOException("Unrecognized FSImage");
+ }
+
+ FileSummary summary = FSImageUtil.loadSummary(file);
+
+ try (FileInputStream fin = new FileInputStream(file.getFD())) {
+ InputStream is;
+ ArrayList sections =
+ Lists.newArrayList(summary.getSectionsList());
+ Collections.sort(sections,
+ new Comparator() {
+ @Override
+ public int compare(FsImageProto.FileSummary.Section s1,
+ FsImageProto.FileSummary.Section s2) {
+ FSImageFormatProtobuf.SectionName n1 =
+ FSImageFormatProtobuf.SectionName.fromString(s1.getName());
+ FSImageFormatProtobuf.SectionName n2 =
+ FSImageFormatProtobuf.SectionName.fromString(s2.getName());
+ if (n1 == null) {
+ return n2 == null ? 0 : -1;
+ } else if (n2 == null) {
+ return -1;
+ } else {
+ return n1.ordinal() - n2.ordinal();
+ }
+ }
+ });
+
+ for (FileSummary.Section section : sections) {
+ fin.getChannel().position(section.getOffset());
+ is = FSImageUtil.wrapInputStreamForCompression(conf,
+ summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+ fin, section.getLength())));
+ switch (SectionName.fromString(section.getName())) {
+ case STRING_TABLE:
+ stringTable = FSImageLoader.loadStringTable(is);
+ break;
+ default:
+ break;
+ }
+ }
+
+ loadDirectories(fin, sections, summary, conf);
+ loadINodeDirSection(fin, sections, summary, conf);
+ metadataMap.sync();
+ output(conf, summary, fin, sections);
+ }
+ }
+
+ private void output(Configuration conf, FileSummary summary,
+ FileInputStream fin, ArrayList sections)
+ throws IOException {
+ InputStream is;
+ long startTime = Time.monotonicNow();
+ for (FileSummary.Section section : sections) {
+ if (SectionName.fromString(section.getName()) == SectionName.INODE) {
+ fin.getChannel().position(section.getOffset());
+ is = FSImageUtil.wrapInputStreamForCompression(conf,
+ summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+ fin, section.getLength())));
+ outputINodes(is);
+ }
+ }
+ long timeTaken = Time.monotonicNow() - startTime;
+ LOG.debug("Time to output inodes: {}ms", timeTaken);
+ }
+
+ protected PermissionStatus getPermission(long perm) {
+ return FSImageFormatPBINode.Loader.loadPermission(perm, stringTable);
+ }
+
+ /** Load the directories in the INode section. */
+ private void loadDirectories(
+ FileInputStream fin, List sections,
+ FileSummary summary, Configuration conf)
+ throws IOException {
+ LOG.info("Loading directories");
+ long startTime = Time.monotonicNow();
+ for (FileSummary.Section section : sections) {
+ if (SectionName.fromString(section.getName())
+ == SectionName.INODE) {
+ fin.getChannel().position(section.getOffset());
+ InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+ summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+ fin, section.getLength())));
+ loadDirectoriesInINodeSection(is);
+ }
+ }
+ long timeTaken = Time.monotonicNow() - startTime;
+ LOG.info("Finished loading directories in {}ms", timeTaken);
+ }
+
+ private void loadINodeDirSection(
+ FileInputStream fin, List sections,
+ FileSummary summary, Configuration conf)
+ throws IOException {
+ LOG.info("Loading INode directory section.");
+ long startTime = Time.monotonicNow();
+ for (FileSummary.Section section : sections) {
+ if (SectionName.fromString(section.getName())
+ == SectionName.INODE_DIR) {
+ fin.getChannel().position(section.getOffset());
+ InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+ summary.getCodec(), new BufferedInputStream(
+ new LimitInputStream(fin, section.getLength())));
+ buildNamespace(is);
+ }
+ }
+ long timeTaken = Time.monotonicNow() - startTime;
+ LOG.info("Finished loading INode directory section in {}ms", timeTaken);
+ }
+
+ /**
+ * Load the filenames of the directories from the INode section.
+ */
+ private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
+ INodeSection s = INodeSection.parseDelimitedFrom(in);
+ LOG.info("Loading directories in INode section.");
+ int numDirs = 0;
+ for (int i = 0; i < s.getNumInodes(); ++i) {
+ INode p = INode.parseDelimitedFrom(in);
+ if (LOG.isDebugEnabled() && i % 10000 == 0) {
+ LOG.debug("Scanned {} inodes.", i);
+ }
+ if (p.hasDirectory()) {
+ metadataMap.putDir(p);
+ numDirs++;
+ }
+ }
+ LOG.info("Found {} directories in INode section.", numDirs);
+ }
+
+ /**
+ * Scan the INodeDirectory section to construct the namespace.
+ */
+ private void buildNamespace(InputStream in) throws IOException {
+ int count = 0;
+ while (true) {
+ FsImageProto.INodeDirectorySection.DirEntry e =
+ FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in);
+ if (e == null) {
+ break;
+ }
+ count++;
+ if (LOG.isDebugEnabled() && count % 10000 == 0) {
+ LOG.debug("Scanned {} directories.", count);
+ }
+ long parentId = e.getParent();
+ // Referred INode is not support for now.
+ for (int i = 0; i < e.getChildrenCount(); i++) {
+ long childId = e.getChildren(i);
+ metadataMap.putDirChild(parentId, childId);
+ }
+ Preconditions.checkState(e.getRefChildrenCount() == 0);
+ }
+ LOG.info("Scanned {} INode directories to build namespace.", count);
+ }
+
+ private void outputINodes(InputStream in) throws IOException {
+ INodeSection s = INodeSection.parseDelimitedFrom(in);
+ LOG.info("Found {} INodes in the INode section", s.getNumInodes());
+ for (int i = 0; i < s.getNumInodes(); ++i) {
+ INode p = INode.parseDelimitedFrom(in);
+ String parentPath = metadataMap.getParentPath(p.getId());
+ out.println(getEntry(parentPath, p));
+
+ if (LOG.isDebugEnabled() && i % 100000 == 0) {
+ LOG.debug("Outputted {} INodes.", i);
+ }
+ }
+ LOG.info("Outputted {} INodes.", s.getNumInodes());
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
index 4bb2b79593..a8d1d54450 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
@@ -20,22 +20,26 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.io.PrintStream;
-import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.io.StringReader;
-import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -51,6 +55,7 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -329,6 +334,51 @@ public void testWebImageViewer() throws Exception {
}
}
+ @Test
+ public void testPBDelimitedWriter() throws IOException, InterruptedException {
+ testPBDelimitedWriter(""); // Test in memory db.
+ testPBDelimitedWriter(
+ new FileSystemTestHelper().getTestRootDir() + "/delimited.db");
+ }
+
+ private void testPBDelimitedWriter(String db)
+ throws IOException, InterruptedException {
+ final String DELIMITER = "\t";
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+
+ try (PrintStream o = new PrintStream(output)) {
+ PBImageDelimitedTextWriter v =
+ new PBImageDelimitedTextWriter(o, DELIMITER, db);
+ v.visit(new RandomAccessFile(originalFsimage, "r"));
+ }
+
+ Set fileNames = new HashSet<>();
+ try (
+ ByteArrayInputStream input =
+ new ByteArrayInputStream(output.toByteArray());
+ BufferedReader reader =
+ new BufferedReader(new InputStreamReader(input))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ System.out.println(line);
+ String[] fields = line.split(DELIMITER);
+ assertEquals(12, fields.length);
+ fileNames.add(fields[0]);
+ }
+ }
+
+ // writtenFiles does not contain root directory and "invalid XML char" dir.
+ for (Iterator it = fileNames.iterator(); it.hasNext(); ) {
+ String filename = it.next();
+ if (filename.startsWith("/dirContainingInvalidXMLChar")) {
+ it.remove();
+ } else if (filename.equals("/")) {
+ it.remove();
+ }
+ }
+ assertEquals(writtenFiles.keySet(), fileNames);
+ }
+
private static void compareFile(FileStatus expected, FileStatus status) {
assertEquals(expected.getAccessTime(), status.getAccessTime());
assertEquals(expected.getBlockSize(), status.getBlockSize());