HDFS-6673. Add delimited format support to PB OIV tool. Contributed by Eddy Xu.
This commit is contained in:
parent
9850e1584b
commit
caf7298e49
@ -309,6 +309,8 @@ Release 2.7.0 - UNRELEASED
|
||||
|
||||
HDFS-7056. Snapshot support for truncate. (Plamen Jeliazkov and shv)
|
||||
|
||||
HDFS-6673. Add delimited format support to PB OIV tool. (Eddy Xu via wang)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-7055. Add tracing to DFSInputStream (cmccabe)
|
||||
|
@ -198,6 +198,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.fusesource.leveldbjni</groupId>
|
||||
<artifactId>leveldbjni-all</artifactId>
|
||||
<version>1.8</version>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.bouncycastle</groupId>
|
||||
|
@ -237,7 +237,7 @@ private static byte[][] loadINodeSection(InputStream in)
|
||||
return inodes;
|
||||
}
|
||||
|
||||
private static String[] loadStringTable(InputStream in) throws
|
||||
static String[] loadStringTable(InputStream in) throws
|
||||
IOException {
|
||||
FsImageProto.StringTableSection s = FsImageProto.StringTableSection
|
||||
.parseDelimitedFrom(in);
|
||||
@ -479,7 +479,7 @@ private long lookup(String path) throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
private long getFileSize(FsImageProto.INodeSection.INodeFile f) {
|
||||
static long getFileSize(FsImageProto.INodeSection.INodeFile f) {
|
||||
long size = 0;
|
||||
for (HdfsProtos.BlockProto p : f.getBlocksList()) {
|
||||
size += p.getNumBytes();
|
||||
|
@ -65,6 +65,10 @@ public class OfflineImageViewerPB {
|
||||
+ " -step defines the granularity of the distribution. (2MB by default)\n"
|
||||
+ " * Web: Run a viewer to expose read-only WebHDFS API.\n"
|
||||
+ " -addr specifies the address to listen. (localhost:5978 by default)\n"
|
||||
+ " * Delimited: Generate a text file with all of the elements common\n"
|
||||
+ " to both inodes and inodes-under-construction, separated by a\n"
|
||||
+ " delimiter. The default delimiter is \\t, though this may be\n"
|
||||
+ " changed via the -delimiter argument.\n"
|
||||
+ "\n"
|
||||
+ "Required command line arguments:\n"
|
||||
+ "-i,--inputFile <arg> FSImage file to process.\n"
|
||||
@ -74,8 +78,12 @@ public class OfflineImageViewerPB {
|
||||
+ " file exists, it will be overwritten.\n"
|
||||
+ " (output to stdout by default)\n"
|
||||
+ "-p,--processor <arg> Select which type of processor to apply\n"
|
||||
+ " against image file. (XML|FileDistribution|Web)\n"
|
||||
+ " against image file. (XML|FileDistribution|Web|Delimited)\n"
|
||||
+ " (Web by default)\n"
|
||||
+ "-delimiter <arg> Delimiting string to use with Delimited processor\n"
|
||||
+ "-t,--temp <arg> Use temporary dir to cache intermediate result to generate\n"
|
||||
+ " Delimited outputs. If not set, Delimited processor constructs\n"
|
||||
+ " the namespace in memory before outputting text."
|
||||
+ "-h,--help Display usage information and exit\n";
|
||||
|
||||
/**
|
||||
@ -97,6 +105,8 @@ private static Options buildOptions() {
|
||||
options.addOption("maxSize", true, "");
|
||||
options.addOption("step", true, "");
|
||||
options.addOption("addr", true, "");
|
||||
options.addOption("delimiter", true, "");
|
||||
options.addOption("t", "temp", true, "");
|
||||
|
||||
return options;
|
||||
}
|
||||
@ -141,6 +151,9 @@ public static int run(String[] args) throws Exception {
|
||||
String inputFile = cmd.getOptionValue("i");
|
||||
String processor = cmd.getOptionValue("p", "Web");
|
||||
String outputFile = cmd.getOptionValue("o", "-");
|
||||
String delimiter = cmd.getOptionValue("delimiter",
|
||||
PBImageDelimitedTextWriter.DEFAULT_DELIMITER);
|
||||
String tempPath = cmd.getOptionValue("t", "");
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
try (PrintStream out = outputFile.equals("-") ?
|
||||
@ -163,6 +176,12 @@ public static int run(String[] args) throws Exception {
|
||||
viewer.start(inputFile);
|
||||
}
|
||||
break;
|
||||
case "Delimited":
|
||||
try (PBImageDelimitedTextWriter writer =
|
||||
new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
|
||||
writer.visit(new RandomAccessFile(inputFile, "r"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
} catch (EOFException e) {
|
||||
|
@ -0,0 +1,132 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||
|
||||
import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* A PBImageDelimitedTextWriter generates a text representation of the PB fsimage,
|
||||
* with each element separated by a delimiter string. All of the elements
|
||||
* common to both inodes and inodes-under-construction are included. When
|
||||
* processing an fsimage with a layout version that did not include an
|
||||
* element, such as AccessTime, the output file will include a column
|
||||
* for the value, but no value will be included.
|
||||
*
|
||||
* Individual block information for each file is not currently included.
|
||||
*
|
||||
* The default delimiter is tab, as this is an unlikely value to be included in
|
||||
* an inode path or other text metadata. The delimiter value can be via the
|
||||
* constructor.
|
||||
*/
|
||||
public class PBImageDelimitedTextWriter extends PBImageTextWriter {
|
||||
static final String DEFAULT_DELIMITER = "\t";
|
||||
private static final String DATE_FORMAT="yyyy-MM-dd HH:mm";
|
||||
private final SimpleDateFormat dateFormatter =
|
||||
new SimpleDateFormat(DATE_FORMAT);
|
||||
|
||||
private final String delimiter;
|
||||
|
||||
PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath)
|
||||
throws IOException {
|
||||
super(out, tempPath);
|
||||
this.delimiter = delimiter;
|
||||
}
|
||||
|
||||
private String formatDate(long date) {
|
||||
return dateFormatter.format(new Date(date));
|
||||
}
|
||||
|
||||
private void append(StringBuffer buffer, int field) {
|
||||
buffer.append(delimiter);
|
||||
buffer.append(field);
|
||||
}
|
||||
|
||||
private void append(StringBuffer buffer, long field) {
|
||||
buffer.append(delimiter);
|
||||
buffer.append(field);
|
||||
}
|
||||
|
||||
private void append(StringBuffer buffer, String field) {
|
||||
buffer.append(delimiter);
|
||||
buffer.append(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getEntry(String parent, INode inode) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
String path = new File(parent, inode.getName().toStringUtf8()).toString();
|
||||
buffer.append(path);
|
||||
PermissionStatus p = null;
|
||||
|
||||
switch (inode.getType()) {
|
||||
case FILE:
|
||||
INodeFile file = inode.getFile();
|
||||
p = getPermission(file.getPermission());
|
||||
append(buffer, file.getReplication());
|
||||
append(buffer, formatDate(file.getModificationTime()));
|
||||
append(buffer, formatDate(file.getAccessTime()));
|
||||
append(buffer, file.getPreferredBlockSize());
|
||||
append(buffer, file.getBlocksCount());
|
||||
append(buffer, FSImageLoader.getFileSize(file));
|
||||
append(buffer, 0); // NS_QUOTA
|
||||
append(buffer, 0); // DS_QUOTA
|
||||
break;
|
||||
case DIRECTORY:
|
||||
INodeDirectory dir = inode.getDirectory();
|
||||
p = getPermission(dir.getPermission());
|
||||
append(buffer, 0); // Replication
|
||||
append(buffer, formatDate(dir.getModificationTime()));
|
||||
append(buffer, formatDate(0)); // Access time.
|
||||
append(buffer, 0); // Block size.
|
||||
append(buffer, 0); // Num blocks.
|
||||
append(buffer, 0); // Num bytes.
|
||||
append(buffer, dir.getNsQuota());
|
||||
append(buffer, dir.getDsQuota());
|
||||
break;
|
||||
case SYMLINK:
|
||||
INodeSymlink s = inode.getSymlink();
|
||||
p = getPermission(s.getPermission());
|
||||
append(buffer, 0); // Replication
|
||||
append(buffer, formatDate(s.getModificationTime()));
|
||||
append(buffer, formatDate(s.getAccessTime()));
|
||||
append(buffer, 0); // Block size.
|
||||
append(buffer, 0); // Num blocks.
|
||||
append(buffer, 0); // Num bytes.
|
||||
append(buffer, 0); // NS_QUOTA
|
||||
append(buffer, 0); // DS_QUOTA
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
assert p != null;
|
||||
append(buffer, p.getPermission().toString());
|
||||
append(buffer, p.getUserName());
|
||||
append(buffer, p.getGroupName());
|
||||
return buffer.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,586 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.io.LimitInputStream;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.INodeId;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.fusesource.leveldbjni.JniDBFactory;
|
||||
import org.iq80.leveldb.DB;
|
||||
import org.iq80.leveldb.Options;
|
||||
import org.iq80.leveldb.WriteBatch;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class reads the protobuf-based fsimage and generates text output
|
||||
* for each inode to {@link PBImageTextWriter#out}. The sub-class can override
|
||||
* {@link getEntry()} to generate formatted string for each inode.
|
||||
*
|
||||
* Since protobuf-based fsimage does not guarantee the order of inodes and
|
||||
* directories, PBImageTextWriter runs two-phase scans:
|
||||
*
|
||||
* <ol>
|
||||
* <li>The first phase, PBImageTextWriter scans the INode sections to reads the
|
||||
* filename of each directory. It also scans the INode_Dir sections to loads
|
||||
* the relationships between a directory and its children. It uses these metadata
|
||||
* to build FS namespace and stored in {@link MetadataMap}</li>
|
||||
* <li>The second phase, PBImageTextWriter re-scans the INode sections. For each
|
||||
* inode, it looks up the path of the parent directory in the {@link MetadataMap},
|
||||
* and generate output.</li>
|
||||
* </ol>
|
||||
*
|
||||
* Two various of {@link MetadataMap} are provided. {@link InMemoryMetadataDB}
|
||||
* stores all metadata in memory (O(n) memory) while
|
||||
* {@link LevelDBMetadataMap} stores metadata in LevelDB on disk (O(1) memory).
|
||||
* User can choose between them based on the time/space tradeoffs.
|
||||
*/
|
||||
abstract class PBImageTextWriter implements Closeable {
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(PBImageTextWriter.class);
|
||||
|
||||
/**
|
||||
* This metadata map is used to construct the namespace before generating
|
||||
* text outputs.
|
||||
*
|
||||
* It contains two mapping relationships:
|
||||
* <p>
|
||||
* <li>It maps each inode (inode Id) to its parent directory (inode Id).</li>
|
||||
* <li>It maps each directory from its inode Id.</li>
|
||||
* </p>
|
||||
*/
|
||||
private static interface MetadataMap extends Closeable {
|
||||
/**
|
||||
* Associate an inode with its parent directory.
|
||||
*/
|
||||
public void putDirChild(long parentId, long childId) throws IOException;
|
||||
|
||||
/**
|
||||
* Associate a directory with its inode Id.
|
||||
*/
|
||||
public void putDir(INode dir) throws IOException;
|
||||
|
||||
/** Get the full path of the parent directory for the given inode. */
|
||||
public String getParentPath(long inode) throws IOException;
|
||||
|
||||
/** Synchronize metadata to persistent storage, if possible */
|
||||
public void sync() throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maintain all the metadata in memory.
|
||||
*/
|
||||
private static class InMemoryMetadataDB implements MetadataMap {
|
||||
/**
|
||||
* Represent a directory in memory.
|
||||
*/
|
||||
private static class Dir {
|
||||
private final long inode;
|
||||
private Dir parent = null;
|
||||
private String name;
|
||||
private String path = null; // cached full path of the directory.
|
||||
|
||||
Dir(long inode, String name) {
|
||||
this.inode = inode;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
private void setParent(Dir parent) {
|
||||
Preconditions.checkState(this.parent == null);
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the full path of this directory.
|
||||
*/
|
||||
private String getPath() {
|
||||
if (this.parent == null) {
|
||||
return "/";
|
||||
}
|
||||
if (this.path == null) {
|
||||
this.path = new File(parent.getPath(), name).toString();
|
||||
this.name = null;
|
||||
}
|
||||
return this.path;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return o instanceof Dir && inode == ((Dir) o).inode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Long.valueOf(inode).hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
/** INode Id to Dir object mapping */
|
||||
private Map<Long, Dir> dirMap = new HashMap<>();
|
||||
|
||||
/** Children to parent directory INode ID mapping. */
|
||||
private Map<Long, Dir> dirChildMap = new HashMap<>();
|
||||
|
||||
InMemoryMetadataDB() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDirChild(long parentId, long childId) {
|
||||
Dir parent = dirMap.get(parentId);
|
||||
Dir child = dirMap.get(childId);
|
||||
if (child != null) {
|
||||
child.setParent(parent);
|
||||
}
|
||||
Preconditions.checkState(!dirChildMap.containsKey(childId));
|
||||
dirChildMap.put(childId, parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDir(INode p) {
|
||||
Preconditions.checkState(!dirMap.containsKey(p.getId()));
|
||||
Dir dir = new Dir(p.getId(), p.getName().toStringUtf8());
|
||||
dirMap.put(p.getId(), dir);
|
||||
}
|
||||
|
||||
public String getParentPath(long inode) throws IOException {
|
||||
if (inode == INodeId.ROOT_INODE_ID) {
|
||||
return "";
|
||||
}
|
||||
Dir parent = dirChildMap.get(inode);
|
||||
Preconditions.checkState(parent != null,
|
||||
"Can not find parent directory for INode: %s", inode);
|
||||
return parent.getPath();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A MetadataMap that stores metadata in LevelDB.
|
||||
*/
|
||||
private static class LevelDBMetadataMap implements MetadataMap {
|
||||
/**
|
||||
* Store metadata in LevelDB.
|
||||
*/
|
||||
private static class LevelDBStore implements Closeable {
|
||||
private DB db = null;
|
||||
private WriteBatch batch = null;
|
||||
private int writeCount = 0;
|
||||
private static final int BATCH_SIZE = 1024;
|
||||
|
||||
LevelDBStore(final File dbPath) throws IOException {
|
||||
Options options = new Options();
|
||||
options.createIfMissing(true);
|
||||
options.errorIfExists(true);
|
||||
db = JniDBFactory.factory.open(dbPath, options);
|
||||
batch = db.createWriteBatch();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (batch != null) {
|
||||
IOUtils.cleanup(null, batch);
|
||||
batch = null;
|
||||
}
|
||||
IOUtils.cleanup(null, db);
|
||||
db = null;
|
||||
}
|
||||
|
||||
public void put(byte[] key, byte[] value) throws IOException {
|
||||
batch.put(key, value);
|
||||
writeCount++;
|
||||
if (writeCount >= BATCH_SIZE) {
|
||||
sync();
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] get(byte[] key) throws IOException {
|
||||
return db.get(key);
|
||||
}
|
||||
|
||||
public void sync() throws IOException {
|
||||
try {
|
||||
db.write(batch);
|
||||
} finally {
|
||||
batch.close();
|
||||
batch = null;
|
||||
}
|
||||
batch = db.createWriteBatch();
|
||||
writeCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A LRU cache for directory path strings.
|
||||
*
|
||||
* The key of this LRU cache is the inode of a directory.
|
||||
*/
|
||||
private static class DirPathCache extends LinkedHashMap<Long, String> {
|
||||
private final static int CAPACITY = 16 * 1024;
|
||||
|
||||
DirPathCache() {
|
||||
super(CAPACITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean removeEldestEntry(Map.Entry<Long, String> entry) {
|
||||
return super.size() > CAPACITY;
|
||||
}
|
||||
}
|
||||
|
||||
/** Map the child inode to the parent directory inode. */
|
||||
private LevelDBStore dirChildMap = null;
|
||||
/** Directory entry map */
|
||||
private LevelDBStore dirMap = null;
|
||||
private DirPathCache dirPathCache = new DirPathCache();
|
||||
|
||||
LevelDBMetadataMap(String baseDir) throws IOException {
|
||||
File dbDir = new File(baseDir);
|
||||
if (dbDir.exists()) {
|
||||
FileUtils.deleteDirectory(dbDir);
|
||||
}
|
||||
if (!dbDir.mkdirs()) {
|
||||
throw new IOException("Failed to mkdir on " + dbDir);
|
||||
}
|
||||
try {
|
||||
dirChildMap = new LevelDBStore(new File(dbDir, "dirChildMap"));
|
||||
dirMap = new LevelDBStore(new File(dbDir, "dirMap"));
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to open LevelDBs", e);
|
||||
IOUtils.cleanup(null, this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.cleanup(null, dirChildMap, dirMap);
|
||||
dirChildMap = null;
|
||||
dirMap = null;
|
||||
}
|
||||
|
||||
private static byte[] toBytes(long value) {
|
||||
return ByteBuffer.allocate(8).putLong(value).array();
|
||||
}
|
||||
|
||||
private static byte[] toBytes(String value)
|
||||
throws UnsupportedEncodingException {
|
||||
return value.getBytes("UTF-8");
|
||||
}
|
||||
|
||||
private static long toLong(byte[] bytes) {
|
||||
Preconditions.checkArgument(bytes.length == 8);
|
||||
return ByteBuffer.wrap(bytes).getLong();
|
||||
}
|
||||
|
||||
private static String toString(byte[] bytes) throws IOException {
|
||||
try {
|
||||
return new String(bytes, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDirChild(long parentId, long childId) throws IOException {
|
||||
dirChildMap.put(toBytes(childId), toBytes(parentId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDir(INode dir) throws IOException {
|
||||
Preconditions.checkArgument(dir.hasDirectory(),
|
||||
"INode %s (%s) is not a directory.", dir.getId(), dir.getName());
|
||||
dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getParentPath(long inode) throws IOException {
|
||||
if (inode == INodeId.ROOT_INODE_ID) {
|
||||
return "/";
|
||||
}
|
||||
byte[] bytes = dirChildMap.get(toBytes(inode));
|
||||
Preconditions.checkState(bytes != null && bytes.length == 8,
|
||||
"Can not find parent directory for inode %s, "
|
||||
+ "fsimage might be corrupted", inode);
|
||||
long parent = toLong(bytes);
|
||||
if (!dirPathCache.containsKey(parent)) {
|
||||
bytes = dirMap.get(toBytes(parent));
|
||||
if (parent != INodeId.ROOT_INODE_ID) {
|
||||
Preconditions.checkState(bytes != null,
|
||||
"Can not find parent directory for inode %s, "
|
||||
+ ", the fsimage might be corrupted.", parent);
|
||||
}
|
||||
String parentName = toString(bytes);
|
||||
String parentPath =
|
||||
new File(getParentPath(parent), parentName).toString();
|
||||
dirPathCache.put(parent, parentPath);
|
||||
}
|
||||
return dirPathCache.get(parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync() throws IOException {
|
||||
dirChildMap.sync();
|
||||
dirMap.sync();
|
||||
}
|
||||
}
|
||||
|
||||
private String[] stringTable;
|
||||
private PrintStream out;
|
||||
private MetadataMap metadataMap = null;
|
||||
|
||||
/**
|
||||
* Construct a PB FsImage writer to generate text file.
|
||||
* @param out the writer to output text information of fsimage.
|
||||
* @param tempPath the path to store metadata. If it is empty, store metadata
|
||||
* in memory instead.
|
||||
*/
|
||||
PBImageTextWriter(PrintStream out, String tempPath) throws IOException {
|
||||
this.out = out;
|
||||
if (tempPath.isEmpty()) {
|
||||
metadataMap = new InMemoryMetadataDB();
|
||||
} else {
|
||||
metadataMap = new LevelDBMetadataMap(tempPath);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.cleanup(null, metadataMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text output for the given inode.
|
||||
* @param parent the path of parent directory
|
||||
* @param inode the INode object to output.
|
||||
*/
|
||||
abstract protected String getEntry(String parent, INode inode);
|
||||
|
||||
public void visit(RandomAccessFile file) throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
if (!FSImageUtil.checkFileFormat(file)) {
|
||||
throw new IOException("Unrecognized FSImage");
|
||||
}
|
||||
|
||||
FileSummary summary = FSImageUtil.loadSummary(file);
|
||||
|
||||
try (FileInputStream fin = new FileInputStream(file.getFD())) {
|
||||
InputStream is;
|
||||
ArrayList<FileSummary.Section> sections =
|
||||
Lists.newArrayList(summary.getSectionsList());
|
||||
Collections.sort(sections,
|
||||
new Comparator<FileSummary.Section>() {
|
||||
@Override
|
||||
public int compare(FsImageProto.FileSummary.Section s1,
|
||||
FsImageProto.FileSummary.Section s2) {
|
||||
FSImageFormatProtobuf.SectionName n1 =
|
||||
FSImageFormatProtobuf.SectionName.fromString(s1.getName());
|
||||
FSImageFormatProtobuf.SectionName n2 =
|
||||
FSImageFormatProtobuf.SectionName.fromString(s2.getName());
|
||||
if (n1 == null) {
|
||||
return n2 == null ? 0 : -1;
|
||||
} else if (n2 == null) {
|
||||
return -1;
|
||||
} else {
|
||||
return n1.ordinal() - n2.ordinal();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (FileSummary.Section section : sections) {
|
||||
fin.getChannel().position(section.getOffset());
|
||||
is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||
fin, section.getLength())));
|
||||
switch (SectionName.fromString(section.getName())) {
|
||||
case STRING_TABLE:
|
||||
stringTable = FSImageLoader.loadStringTable(is);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
loadDirectories(fin, sections, summary, conf);
|
||||
loadINodeDirSection(fin, sections, summary, conf);
|
||||
metadataMap.sync();
|
||||
output(conf, summary, fin, sections);
|
||||
}
|
||||
}
|
||||
|
||||
private void output(Configuration conf, FileSummary summary,
|
||||
FileInputStream fin, ArrayList<FileSummary.Section> sections)
|
||||
throws IOException {
|
||||
InputStream is;
|
||||
long startTime = Time.monotonicNow();
|
||||
for (FileSummary.Section section : sections) {
|
||||
if (SectionName.fromString(section.getName()) == SectionName.INODE) {
|
||||
fin.getChannel().position(section.getOffset());
|
||||
is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||
fin, section.getLength())));
|
||||
outputINodes(is);
|
||||
}
|
||||
}
|
||||
long timeTaken = Time.monotonicNow() - startTime;
|
||||
LOG.debug("Time to output inodes: {}ms", timeTaken);
|
||||
}
|
||||
|
||||
protected PermissionStatus getPermission(long perm) {
|
||||
return FSImageFormatPBINode.Loader.loadPermission(perm, stringTable);
|
||||
}
|
||||
|
||||
/** Load the directories in the INode section. */
|
||||
private void loadDirectories(
|
||||
FileInputStream fin, List<FileSummary.Section> sections,
|
||||
FileSummary summary, Configuration conf)
|
||||
throws IOException {
|
||||
LOG.info("Loading directories");
|
||||
long startTime = Time.monotonicNow();
|
||||
for (FileSummary.Section section : sections) {
|
||||
if (SectionName.fromString(section.getName())
|
||||
== SectionName.INODE) {
|
||||
fin.getChannel().position(section.getOffset());
|
||||
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||
fin, section.getLength())));
|
||||
loadDirectoriesInINodeSection(is);
|
||||
}
|
||||
}
|
||||
long timeTaken = Time.monotonicNow() - startTime;
|
||||
LOG.info("Finished loading directories in {}ms", timeTaken);
|
||||
}
|
||||
|
||||
private void loadINodeDirSection(
|
||||
FileInputStream fin, List<FileSummary.Section> sections,
|
||||
FileSummary summary, Configuration conf)
|
||||
throws IOException {
|
||||
LOG.info("Loading INode directory section.");
|
||||
long startTime = Time.monotonicNow();
|
||||
for (FileSummary.Section section : sections) {
|
||||
if (SectionName.fromString(section.getName())
|
||||
== SectionName.INODE_DIR) {
|
||||
fin.getChannel().position(section.getOffset());
|
||||
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||
summary.getCodec(), new BufferedInputStream(
|
||||
new LimitInputStream(fin, section.getLength())));
|
||||
buildNamespace(is);
|
||||
}
|
||||
}
|
||||
long timeTaken = Time.monotonicNow() - startTime;
|
||||
LOG.info("Finished loading INode directory section in {}ms", timeTaken);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the filenames of the directories from the INode section.
|
||||
*/
|
||||
private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
|
||||
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||
LOG.info("Loading directories in INode section.");
|
||||
int numDirs = 0;
|
||||
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||
INode p = INode.parseDelimitedFrom(in);
|
||||
if (LOG.isDebugEnabled() && i % 10000 == 0) {
|
||||
LOG.debug("Scanned {} inodes.", i);
|
||||
}
|
||||
if (p.hasDirectory()) {
|
||||
metadataMap.putDir(p);
|
||||
numDirs++;
|
||||
}
|
||||
}
|
||||
LOG.info("Found {} directories in INode section.", numDirs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the INodeDirectory section to construct the namespace.
|
||||
*/
|
||||
private void buildNamespace(InputStream in) throws IOException {
|
||||
int count = 0;
|
||||
while (true) {
|
||||
FsImageProto.INodeDirectorySection.DirEntry e =
|
||||
FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in);
|
||||
if (e == null) {
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
if (LOG.isDebugEnabled() && count % 10000 == 0) {
|
||||
LOG.debug("Scanned {} directories.", count);
|
||||
}
|
||||
long parentId = e.getParent();
|
||||
// Referred INode is not support for now.
|
||||
for (int i = 0; i < e.getChildrenCount(); i++) {
|
||||
long childId = e.getChildren(i);
|
||||
metadataMap.putDirChild(parentId, childId);
|
||||
}
|
||||
Preconditions.checkState(e.getRefChildrenCount() == 0);
|
||||
}
|
||||
LOG.info("Scanned {} INode directories to build namespace.", count);
|
||||
}
|
||||
|
||||
private void outputINodes(InputStream in) throws IOException {
|
||||
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||
LOG.info("Found {} INodes in the INode section", s.getNumInodes());
|
||||
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||
INode p = INode.parseDelimitedFrom(in);
|
||||
String parentPath = metadataMap.getParentPath(p.getId());
|
||||
out.println(getEntry(parentPath, p));
|
||||
|
||||
if (LOG.isDebugEnabled() && i % 100000 == 0) {
|
||||
LOG.debug("Outputted {} INodes.", i);
|
||||
}
|
||||
}
|
||||
LOG.info("Outputted {} INodes.", s.getNumInodes());
|
||||
}
|
||||
}
|
@ -20,22 +20,26 @@
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -51,6 +55,7 @@
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileSystemTestHelper;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
@ -329,6 +334,51 @@ public void testWebImageViewer() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPBDelimitedWriter() throws IOException, InterruptedException {
|
||||
testPBDelimitedWriter(""); // Test in memory db.
|
||||
testPBDelimitedWriter(
|
||||
new FileSystemTestHelper().getTestRootDir() + "/delimited.db");
|
||||
}
|
||||
|
||||
private void testPBDelimitedWriter(String db)
|
||||
throws IOException, InterruptedException {
|
||||
final String DELIMITER = "\t";
|
||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
|
||||
try (PrintStream o = new PrintStream(output)) {
|
||||
PBImageDelimitedTextWriter v =
|
||||
new PBImageDelimitedTextWriter(o, DELIMITER, db);
|
||||
v.visit(new RandomAccessFile(originalFsimage, "r"));
|
||||
}
|
||||
|
||||
Set<String> fileNames = new HashSet<>();
|
||||
try (
|
||||
ByteArrayInputStream input =
|
||||
new ByteArrayInputStream(output.toByteArray());
|
||||
BufferedReader reader =
|
||||
new BufferedReader(new InputStreamReader(input))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
System.out.println(line);
|
||||
String[] fields = line.split(DELIMITER);
|
||||
assertEquals(12, fields.length);
|
||||
fileNames.add(fields[0]);
|
||||
}
|
||||
}
|
||||
|
||||
// writtenFiles does not contain root directory and "invalid XML char" dir.
|
||||
for (Iterator<String> it = fileNames.iterator(); it.hasNext(); ) {
|
||||
String filename = it.next();
|
||||
if (filename.startsWith("/dirContainingInvalidXMLChar")) {
|
||||
it.remove();
|
||||
} else if (filename.equals("/")) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
assertEquals(writtenFiles.keySet(), fileNames);
|
||||
}
|
||||
|
||||
private static void compareFile(FileStatus expected, FileStatus status) {
|
||||
assertEquals(expected.getAccessTime(), status.getAccessTime());
|
||||
assertEquals(expected.getBlockSize(), status.getBlockSize());
|
||||
|
Loading…
Reference in New Issue
Block a user