HDFS-6673. Add delimited format support to PB OIV tool. Contributed by Eddy Xu.

This commit is contained in:
Andrew Wang 2015-01-28 12:36:29 -08:00
parent 9850e1584b
commit caf7298e49
7 changed files with 799 additions and 5 deletions

View File

@ -309,6 +309,8 @@ Release 2.7.0 - UNRELEASED
HDFS-7056. Snapshot support for truncate. (Plamen Jeliazkov and shv)
HDFS-6673. Add delimited format support to PB OIV tool. (Eddy Xu via wang)
IMPROVEMENTS
HDFS-7055. Add tracing to DFSInputStream (cmccabe)

View File

@ -198,6 +198,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.fusesource.leveldbjni</groupId>
<artifactId>leveldbjni-all</artifactId>
<version>1.8</version>
</dependency>
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
<dependency>
<groupId>org.bouncycastle</groupId>

View File

@ -237,7 +237,7 @@ private static byte[][] loadINodeSection(InputStream in)
return inodes;
}
private static String[] loadStringTable(InputStream in) throws
static String[] loadStringTable(InputStream in) throws
IOException {
FsImageProto.StringTableSection s = FsImageProto.StringTableSection
.parseDelimitedFrom(in);
@ -479,7 +479,7 @@ private long lookup(String path) throws IOException {
}
}
private long getFileSize(FsImageProto.INodeSection.INodeFile f) {
static long getFileSize(FsImageProto.INodeSection.INodeFile f) {
long size = 0;
for (HdfsProtos.BlockProto p : f.getBlocksList()) {
size += p.getNumBytes();

View File

@ -65,6 +65,10 @@ public class OfflineImageViewerPB {
+ " -step defines the granularity of the distribution. (2MB by default)\n"
+ " * Web: Run a viewer to expose read-only WebHDFS API.\n"
+ " -addr specifies the address to listen. (localhost:5978 by default)\n"
+ " * Delimited: Generate a text file with all of the elements common\n"
+ " to both inodes and inodes-under-construction, separated by a\n"
+ " delimiter. The default delimiter is \\t, though this may be\n"
+ " changed via the -delimiter argument.\n"
+ "\n"
+ "Required command line arguments:\n"
+ "-i,--inputFile <arg> FSImage file to process.\n"
@ -74,8 +78,12 @@ public class OfflineImageViewerPB {
+ " file exists, it will be overwritten.\n"
+ " (output to stdout by default)\n"
+ "-p,--processor <arg> Select which type of processor to apply\n"
+ " against image file. (XML|FileDistribution|Web)\n"
+ " against image file. (XML|FileDistribution|Web|Delimited)\n"
+ " (Web by default)\n"
+ "-delimiter <arg> Delimiting string to use with Delimited processor\n"
+ "-t,--temp <arg> Use temporary dir to cache intermediate result to generate\n"
+ " Delimited outputs. If not set, Delimited processor constructs\n"
+ " the namespace in memory before outputting text."
+ "-h,--help Display usage information and exit\n";
/**
@ -97,6 +105,8 @@ private static Options buildOptions() {
options.addOption("maxSize", true, "");
options.addOption("step", true, "");
options.addOption("addr", true, "");
options.addOption("delimiter", true, "");
options.addOption("t", "temp", true, "");
return options;
}
@ -141,6 +151,9 @@ public static int run(String[] args) throws Exception {
String inputFile = cmd.getOptionValue("i");
String processor = cmd.getOptionValue("p", "Web");
String outputFile = cmd.getOptionValue("o", "-");
String delimiter = cmd.getOptionValue("delimiter",
PBImageDelimitedTextWriter.DEFAULT_DELIMITER);
String tempPath = cmd.getOptionValue("t", "");
Configuration conf = new Configuration();
try (PrintStream out = outputFile.equals("-") ?
@ -163,6 +176,12 @@ public static int run(String[] args) throws Exception {
viewer.start(inputFile);
}
break;
case "Delimited":
try (PBImageDelimitedTextWriter writer =
new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
writer.visit(new RandomAccessFile(inputFile, "r"));
}
break;
}
return 0;
} catch (EOFException e) {

View File

@ -0,0 +1,132 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* A PBImageDelimitedTextWriter generates a text representation of the PB fsimage,
* with each element separated by a delimiter string. All of the elements
* common to both inodes and inodes-under-construction are included. When
* processing an fsimage with a layout version that did not include an
* element, such as AccessTime, the output file will include a column
* for the value, but no value will be included.
*
* Individual block information for each file is not currently included.
*
* The default delimiter is tab, as this is an unlikely value to be included in
* an inode path or other text metadata. The delimiter value can be via the
* constructor.
*/
public class PBImageDelimitedTextWriter extends PBImageTextWriter {
static final String DEFAULT_DELIMITER = "\t";
private static final String DATE_FORMAT="yyyy-MM-dd HH:mm";
private final SimpleDateFormat dateFormatter =
new SimpleDateFormat(DATE_FORMAT);
private final String delimiter;
PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath)
throws IOException {
super(out, tempPath);
this.delimiter = delimiter;
}
private String formatDate(long date) {
return dateFormatter.format(new Date(date));
}
private void append(StringBuffer buffer, int field) {
buffer.append(delimiter);
buffer.append(field);
}
private void append(StringBuffer buffer, long field) {
buffer.append(delimiter);
buffer.append(field);
}
private void append(StringBuffer buffer, String field) {
buffer.append(delimiter);
buffer.append(field);
}
@Override
public String getEntry(String parent, INode inode) {
StringBuffer buffer = new StringBuffer();
String path = new File(parent, inode.getName().toStringUtf8()).toString();
buffer.append(path);
PermissionStatus p = null;
switch (inode.getType()) {
case FILE:
INodeFile file = inode.getFile();
p = getPermission(file.getPermission());
append(buffer, file.getReplication());
append(buffer, formatDate(file.getModificationTime()));
append(buffer, formatDate(file.getAccessTime()));
append(buffer, file.getPreferredBlockSize());
append(buffer, file.getBlocksCount());
append(buffer, FSImageLoader.getFileSize(file));
append(buffer, 0); // NS_QUOTA
append(buffer, 0); // DS_QUOTA
break;
case DIRECTORY:
INodeDirectory dir = inode.getDirectory();
p = getPermission(dir.getPermission());
append(buffer, 0); // Replication
append(buffer, formatDate(dir.getModificationTime()));
append(buffer, formatDate(0)); // Access time.
append(buffer, 0); // Block size.
append(buffer, 0); // Num blocks.
append(buffer, 0); // Num bytes.
append(buffer, dir.getNsQuota());
append(buffer, dir.getDsQuota());
break;
case SYMLINK:
INodeSymlink s = inode.getSymlink();
p = getPermission(s.getPermission());
append(buffer, 0); // Replication
append(buffer, formatDate(s.getModificationTime()));
append(buffer, formatDate(s.getAccessTime()));
append(buffer, 0); // Block size.
append(buffer, 0); // Num blocks.
append(buffer, 0); // Num bytes.
append(buffer, 0); // NS_QUOTA
append(buffer, 0); // DS_QUOTA
break;
default:
break;
}
assert p != null;
append(buffer, p.getPermission().toString());
append(buffer, p.getUserName());
append(buffer, p.getGroupName());
return buffer.toString();
}
}

View File

@ -0,0 +1,586 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.LimitInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeId;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Time;
import org.fusesource.leveldbjni.JniDBFactory;
import org.iq80.leveldb.DB;
import org.iq80.leveldb.Options;
import org.iq80.leveldb.WriteBatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* This class reads the protobuf-based fsimage and generates text output
* for each inode to {@link PBImageTextWriter#out}. The sub-class can override
* {@link getEntry()} to generate formatted string for each inode.
*
* Since protobuf-based fsimage does not guarantee the order of inodes and
* directories, PBImageTextWriter runs two-phase scans:
*
* <ol>
* <li>The first phase, PBImageTextWriter scans the INode sections to reads the
* filename of each directory. It also scans the INode_Dir sections to loads
* the relationships between a directory and its children. It uses these metadata
* to build FS namespace and stored in {@link MetadataMap}</li>
* <li>The second phase, PBImageTextWriter re-scans the INode sections. For each
* inode, it looks up the path of the parent directory in the {@link MetadataMap},
* and generate output.</li>
* </ol>
*
* Two various of {@link MetadataMap} are provided. {@link InMemoryMetadataDB}
* stores all metadata in memory (O(n) memory) while
* {@link LevelDBMetadataMap} stores metadata in LevelDB on disk (O(1) memory).
* User can choose between them based on the time/space tradeoffs.
*/
abstract class PBImageTextWriter implements Closeable {
private static final Logger LOG =
LoggerFactory.getLogger(PBImageTextWriter.class);
/**
* This metadata map is used to construct the namespace before generating
* text outputs.
*
* It contains two mapping relationships:
* <p>
* <li>It maps each inode (inode Id) to its parent directory (inode Id).</li>
* <li>It maps each directory from its inode Id.</li>
* </p>
*/
private static interface MetadataMap extends Closeable {
/**
* Associate an inode with its parent directory.
*/
public void putDirChild(long parentId, long childId) throws IOException;
/**
* Associate a directory with its inode Id.
*/
public void putDir(INode dir) throws IOException;
/** Get the full path of the parent directory for the given inode. */
public String getParentPath(long inode) throws IOException;
/** Synchronize metadata to persistent storage, if possible */
public void sync() throws IOException;
}
/**
* Maintain all the metadata in memory.
*/
private static class InMemoryMetadataDB implements MetadataMap {
/**
* Represent a directory in memory.
*/
private static class Dir {
private final long inode;
private Dir parent = null;
private String name;
private String path = null; // cached full path of the directory.
Dir(long inode, String name) {
this.inode = inode;
this.name = name;
}
private void setParent(Dir parent) {
Preconditions.checkState(this.parent == null);
this.parent = parent;
}
/**
* Returns the full path of this directory.
*/
private String getPath() {
if (this.parent == null) {
return "/";
}
if (this.path == null) {
this.path = new File(parent.getPath(), name).toString();
this.name = null;
}
return this.path;
}
@Override
public boolean equals(Object o) {
return o instanceof Dir && inode == ((Dir) o).inode;
}
@Override
public int hashCode() {
return Long.valueOf(inode).hashCode();
}
}
/** INode Id to Dir object mapping */
private Map<Long, Dir> dirMap = new HashMap<>();
/** Children to parent directory INode ID mapping. */
private Map<Long, Dir> dirChildMap = new HashMap<>();
InMemoryMetadataDB() {
}
@Override
public void close() throws IOException {
}
@Override
public void putDirChild(long parentId, long childId) {
Dir parent = dirMap.get(parentId);
Dir child = dirMap.get(childId);
if (child != null) {
child.setParent(parent);
}
Preconditions.checkState(!dirChildMap.containsKey(childId));
dirChildMap.put(childId, parent);
}
@Override
public void putDir(INode p) {
Preconditions.checkState(!dirMap.containsKey(p.getId()));
Dir dir = new Dir(p.getId(), p.getName().toStringUtf8());
dirMap.put(p.getId(), dir);
}
public String getParentPath(long inode) throws IOException {
if (inode == INodeId.ROOT_INODE_ID) {
return "";
}
Dir parent = dirChildMap.get(inode);
Preconditions.checkState(parent != null,
"Can not find parent directory for INode: %s", inode);
return parent.getPath();
}
@Override
public void sync() {
}
}
/**
* A MetadataMap that stores metadata in LevelDB.
*/
private static class LevelDBMetadataMap implements MetadataMap {
/**
* Store metadata in LevelDB.
*/
private static class LevelDBStore implements Closeable {
private DB db = null;
private WriteBatch batch = null;
private int writeCount = 0;
private static final int BATCH_SIZE = 1024;
LevelDBStore(final File dbPath) throws IOException {
Options options = new Options();
options.createIfMissing(true);
options.errorIfExists(true);
db = JniDBFactory.factory.open(dbPath, options);
batch = db.createWriteBatch();
}
@Override
public void close() throws IOException {
if (batch != null) {
IOUtils.cleanup(null, batch);
batch = null;
}
IOUtils.cleanup(null, db);
db = null;
}
public void put(byte[] key, byte[] value) throws IOException {
batch.put(key, value);
writeCount++;
if (writeCount >= BATCH_SIZE) {
sync();
}
}
public byte[] get(byte[] key) throws IOException {
return db.get(key);
}
public void sync() throws IOException {
try {
db.write(batch);
} finally {
batch.close();
batch = null;
}
batch = db.createWriteBatch();
writeCount = 0;
}
}
/**
* A LRU cache for directory path strings.
*
* The key of this LRU cache is the inode of a directory.
*/
private static class DirPathCache extends LinkedHashMap<Long, String> {
private final static int CAPACITY = 16 * 1024;
DirPathCache() {
super(CAPACITY);
}
@Override
protected boolean removeEldestEntry(Map.Entry<Long, String> entry) {
return super.size() > CAPACITY;
}
}
/** Map the child inode to the parent directory inode. */
private LevelDBStore dirChildMap = null;
/** Directory entry map */
private LevelDBStore dirMap = null;
private DirPathCache dirPathCache = new DirPathCache();
LevelDBMetadataMap(String baseDir) throws IOException {
File dbDir = new File(baseDir);
if (dbDir.exists()) {
FileUtils.deleteDirectory(dbDir);
}
if (!dbDir.mkdirs()) {
throw new IOException("Failed to mkdir on " + dbDir);
}
try {
dirChildMap = new LevelDBStore(new File(dbDir, "dirChildMap"));
dirMap = new LevelDBStore(new File(dbDir, "dirMap"));
} catch (IOException e) {
LOG.error("Failed to open LevelDBs", e);
IOUtils.cleanup(null, this);
}
}
@Override
public void close() throws IOException {
IOUtils.cleanup(null, dirChildMap, dirMap);
dirChildMap = null;
dirMap = null;
}
private static byte[] toBytes(long value) {
return ByteBuffer.allocate(8).putLong(value).array();
}
private static byte[] toBytes(String value)
throws UnsupportedEncodingException {
return value.getBytes("UTF-8");
}
private static long toLong(byte[] bytes) {
Preconditions.checkArgument(bytes.length == 8);
return ByteBuffer.wrap(bytes).getLong();
}
private static String toString(byte[] bytes) throws IOException {
try {
return new String(bytes, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IOException(e);
}
}
@Override
public void putDirChild(long parentId, long childId) throws IOException {
dirChildMap.put(toBytes(childId), toBytes(parentId));
}
@Override
public void putDir(INode dir) throws IOException {
Preconditions.checkArgument(dir.hasDirectory(),
"INode %s (%s) is not a directory.", dir.getId(), dir.getName());
dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8()));
}
@Override
public String getParentPath(long inode) throws IOException {
if (inode == INodeId.ROOT_INODE_ID) {
return "/";
}
byte[] bytes = dirChildMap.get(toBytes(inode));
Preconditions.checkState(bytes != null && bytes.length == 8,
"Can not find parent directory for inode %s, "
+ "fsimage might be corrupted", inode);
long parent = toLong(bytes);
if (!dirPathCache.containsKey(parent)) {
bytes = dirMap.get(toBytes(parent));
if (parent != INodeId.ROOT_INODE_ID) {
Preconditions.checkState(bytes != null,
"Can not find parent directory for inode %s, "
+ ", the fsimage might be corrupted.", parent);
}
String parentName = toString(bytes);
String parentPath =
new File(getParentPath(parent), parentName).toString();
dirPathCache.put(parent, parentPath);
}
return dirPathCache.get(parent);
}
@Override
public void sync() throws IOException {
dirChildMap.sync();
dirMap.sync();
}
}
private String[] stringTable;
private PrintStream out;
private MetadataMap metadataMap = null;
/**
* Construct a PB FsImage writer to generate text file.
* @param out the writer to output text information of fsimage.
* @param tempPath the path to store metadata. If it is empty, store metadata
* in memory instead.
*/
PBImageTextWriter(PrintStream out, String tempPath) throws IOException {
this.out = out;
if (tempPath.isEmpty()) {
metadataMap = new InMemoryMetadataDB();
} else {
metadataMap = new LevelDBMetadataMap(tempPath);
}
}
@Override
public void close() throws IOException {
IOUtils.cleanup(null, metadataMap);
}
/**
* Get text output for the given inode.
* @param parent the path of parent directory
* @param inode the INode object to output.
*/
abstract protected String getEntry(String parent, INode inode);
public void visit(RandomAccessFile file) throws IOException {
Configuration conf = new Configuration();
if (!FSImageUtil.checkFileFormat(file)) {
throw new IOException("Unrecognized FSImage");
}
FileSummary summary = FSImageUtil.loadSummary(file);
try (FileInputStream fin = new FileInputStream(file.getFD())) {
InputStream is;
ArrayList<FileSummary.Section> sections =
Lists.newArrayList(summary.getSectionsList());
Collections.sort(sections,
new Comparator<FileSummary.Section>() {
@Override
public int compare(FsImageProto.FileSummary.Section s1,
FsImageProto.FileSummary.Section s2) {
FSImageFormatProtobuf.SectionName n1 =
FSImageFormatProtobuf.SectionName.fromString(s1.getName());
FSImageFormatProtobuf.SectionName n2 =
FSImageFormatProtobuf.SectionName.fromString(s2.getName());
if (n1 == null) {
return n2 == null ? 0 : -1;
} else if (n2 == null) {
return -1;
} else {
return n1.ordinal() - n2.ordinal();
}
}
});
for (FileSummary.Section section : sections) {
fin.getChannel().position(section.getOffset());
is = FSImageUtil.wrapInputStreamForCompression(conf,
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
fin, section.getLength())));
switch (SectionName.fromString(section.getName())) {
case STRING_TABLE:
stringTable = FSImageLoader.loadStringTable(is);
break;
default:
break;
}
}
loadDirectories(fin, sections, summary, conf);
loadINodeDirSection(fin, sections, summary, conf);
metadataMap.sync();
output(conf, summary, fin, sections);
}
}
private void output(Configuration conf, FileSummary summary,
FileInputStream fin, ArrayList<FileSummary.Section> sections)
throws IOException {
InputStream is;
long startTime = Time.monotonicNow();
for (FileSummary.Section section : sections) {
if (SectionName.fromString(section.getName()) == SectionName.INODE) {
fin.getChannel().position(section.getOffset());
is = FSImageUtil.wrapInputStreamForCompression(conf,
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
fin, section.getLength())));
outputINodes(is);
}
}
long timeTaken = Time.monotonicNow() - startTime;
LOG.debug("Time to output inodes: {}ms", timeTaken);
}
protected PermissionStatus getPermission(long perm) {
return FSImageFormatPBINode.Loader.loadPermission(perm, stringTable);
}
/** Load the directories in the INode section. */
private void loadDirectories(
FileInputStream fin, List<FileSummary.Section> sections,
FileSummary summary, Configuration conf)
throws IOException {
LOG.info("Loading directories");
long startTime = Time.monotonicNow();
for (FileSummary.Section section : sections) {
if (SectionName.fromString(section.getName())
== SectionName.INODE) {
fin.getChannel().position(section.getOffset());
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
fin, section.getLength())));
loadDirectoriesInINodeSection(is);
}
}
long timeTaken = Time.monotonicNow() - startTime;
LOG.info("Finished loading directories in {}ms", timeTaken);
}
private void loadINodeDirSection(
FileInputStream fin, List<FileSummary.Section> sections,
FileSummary summary, Configuration conf)
throws IOException {
LOG.info("Loading INode directory section.");
long startTime = Time.monotonicNow();
for (FileSummary.Section section : sections) {
if (SectionName.fromString(section.getName())
== SectionName.INODE_DIR) {
fin.getChannel().position(section.getOffset());
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
summary.getCodec(), new BufferedInputStream(
new LimitInputStream(fin, section.getLength())));
buildNamespace(is);
}
}
long timeTaken = Time.monotonicNow() - startTime;
LOG.info("Finished loading INode directory section in {}ms", timeTaken);
}
/**
* Load the filenames of the directories from the INode section.
*/
private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
INodeSection s = INodeSection.parseDelimitedFrom(in);
LOG.info("Loading directories in INode section.");
int numDirs = 0;
for (int i = 0; i < s.getNumInodes(); ++i) {
INode p = INode.parseDelimitedFrom(in);
if (LOG.isDebugEnabled() && i % 10000 == 0) {
LOG.debug("Scanned {} inodes.", i);
}
if (p.hasDirectory()) {
metadataMap.putDir(p);
numDirs++;
}
}
LOG.info("Found {} directories in INode section.", numDirs);
}
/**
* Scan the INodeDirectory section to construct the namespace.
*/
private void buildNamespace(InputStream in) throws IOException {
int count = 0;
while (true) {
FsImageProto.INodeDirectorySection.DirEntry e =
FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in);
if (e == null) {
break;
}
count++;
if (LOG.isDebugEnabled() && count % 10000 == 0) {
LOG.debug("Scanned {} directories.", count);
}
long parentId = e.getParent();
// Referred INode is not support for now.
for (int i = 0; i < e.getChildrenCount(); i++) {
long childId = e.getChildren(i);
metadataMap.putDirChild(parentId, childId);
}
Preconditions.checkState(e.getRefChildrenCount() == 0);
}
LOG.info("Scanned {} INode directories to build namespace.", count);
}
private void outputINodes(InputStream in) throws IOException {
INodeSection s = INodeSection.parseDelimitedFrom(in);
LOG.info("Found {} INodes in the INode section", s.getNumInodes());
for (int i = 0; i < s.getNumInodes(); ++i) {
INode p = INode.parseDelimitedFrom(in);
String parentPath = metadataMap.getParentPath(p.getId());
out.println(getEntry(parentPath, p));
if (LOG.isDebugEnabled() && i % 100000 == 0) {
LOG.debug("Outputted {} INodes.", i);
}
}
LOG.info("Outputted {} INodes.", s.getNumInodes());
}
}

View File

@ -20,22 +20,26 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -51,6 +55,7 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystemTestHelper;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
@ -329,6 +334,51 @@ public void testWebImageViewer() throws Exception {
}
}
@Test
public void testPBDelimitedWriter() throws IOException, InterruptedException {
testPBDelimitedWriter(""); // Test in memory db.
testPBDelimitedWriter(
new FileSystemTestHelper().getTestRootDir() + "/delimited.db");
}
private void testPBDelimitedWriter(String db)
throws IOException, InterruptedException {
final String DELIMITER = "\t";
ByteArrayOutputStream output = new ByteArrayOutputStream();
try (PrintStream o = new PrintStream(output)) {
PBImageDelimitedTextWriter v =
new PBImageDelimitedTextWriter(o, DELIMITER, db);
v.visit(new RandomAccessFile(originalFsimage, "r"));
}
Set<String> fileNames = new HashSet<>();
try (
ByteArrayInputStream input =
new ByteArrayInputStream(output.toByteArray());
BufferedReader reader =
new BufferedReader(new InputStreamReader(input))) {
String line;
while ((line = reader.readLine()) != null) {
System.out.println(line);
String[] fields = line.split(DELIMITER);
assertEquals(12, fields.length);
fileNames.add(fields[0]);
}
}
// writtenFiles does not contain root directory and "invalid XML char" dir.
for (Iterator<String> it = fileNames.iterator(); it.hasNext(); ) {
String filename = it.next();
if (filename.startsWith("/dirContainingInvalidXMLChar")) {
it.remove();
} else if (filename.equals("/")) {
it.remove();
}
}
assertEquals(writtenFiles.keySet(), fileNames);
}
private static void compareFile(FileStatus expected, FileStatus status) {
assertEquals(expected.getAccessTime(), status.getAccessTime());
assertEquals(expected.getBlockSize(), status.getBlockSize());