diff --git a/hadoop-tools/hadoop-fs2img/pom.xml b/hadoop-tools/hadoop-fs2img/pom.xml
new file mode 100644
index 0000000000..36096b7d9f
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/pom.xml
@@ -0,0 +1,87 @@
+
+
+
+ 4.0.0
+
+ org.apache.hadoop
+ hadoop-project
+ 3.0.0-alpha3-SNAPSHOT
+ ../../hadoop-project
+
+ org.apache.hadoop
+ hadoop-fs2img
+ 3.0.0-alpha3-SNAPSHOT
+ fs2img
+ fs2img
+ jar
+
+
+ ${project.build.directory}/log
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ provided
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ provided
+
+
+ com.google.protobuf
+ protobuf-java
+ provided
+
+
+ commons-cli
+ commons-cli
+
+
+ junit
+ junit
+ test
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ org.apache.hadoop.hdfs.server.namenode.FileSystemImage
+
+
+
+
+
+
+
+
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java
new file mode 100644
index 0000000000..94b92b833f
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+
+/**
+ * Given an external reference, create a sequence of blocks and associated
+ * metadata.
+ */
+public abstract class BlockResolver {
+
+ protected BlockProto buildBlock(long blockId, long bytes) {
+ return buildBlock(blockId, bytes, 1001);
+ }
+
+ protected BlockProto buildBlock(long blockId, long bytes, long genstamp) {
+ BlockProto.Builder b = BlockProto.newBuilder()
+ .setBlockId(blockId)
+ .setNumBytes(bytes)
+ .setGenStamp(genstamp);
+ return b.build();
+ }
+
+ /**
+ * @param s the external reference.
+ * @return sequence of blocks that make up the reference.
+ */
+ public Iterable resolve(FileStatus s) {
+ List lengths = blockLengths(s);
+ ArrayList ret = new ArrayList<>(lengths.size());
+ long tot = 0;
+ for (long l : lengths) {
+ tot += l;
+ ret.add(buildBlock(nextId(), l));
+ }
+ if (tot != s.getLen()) {
+ // log a warning?
+ throw new IllegalStateException(
+ "Expected " + s.getLen() + " found " + tot);
+ }
+ return ret;
+ }
+
+ /**
+ * @return the next block id.
+ */
+ public abstract long nextId();
+
+ /**
+ * @return the maximum sequentially allocated block ID for this filesystem.
+ */
+ protected abstract long lastId();
+
+ /**
+ * @param status the external reference.
+ * @return the lengths of the resultant blocks.
+ */
+ protected abstract List blockLengths(FileStatus status);
+
+
+ /**
+ * @param status the external reference.
+ * @return the block size to assign to this external reference.
+ */
+ public long preferredBlockSize(FileStatus status) {
+ return status.getBlockSize();
+ }
+
+ /**
+ * @param status the external reference.
+ * @return the replication to assign to this external reference.
+ */
+ public abstract int getReplication(FileStatus status);
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java
new file mode 100644
index 0000000000..f73611244d
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.ConcurrentModificationException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Traversal of an external FileSystem.
+ */
+public class FSTreeWalk extends TreeWalk {
+
+ private final Path root;
+ private final FileSystem fs;
+
+ public FSTreeWalk(Path root, Configuration conf) throws IOException {
+ this.root = root;
+ fs = root.getFileSystem(conf);
+ }
+
+ @Override
+ protected Iterable getChildren(TreePath path, long id,
+ TreeIterator i) {
+ // TODO symlinks
+ if (!path.getFileStatus().isDirectory()) {
+ return Collections.emptyList();
+ }
+ try {
+ ArrayList ret = new ArrayList<>();
+ for (FileStatus s : fs.listStatus(path.getFileStatus().getPath())) {
+ ret.add(new TreePath(s, id, i));
+ }
+ return ret;
+ } catch (FileNotFoundException e) {
+ throw new ConcurrentModificationException("FS modified");
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ class FSTreeIterator extends TreeIterator {
+
+ private FSTreeIterator() {
+ }
+
+ FSTreeIterator(TreePath p) {
+ getPendingQueue().addFirst(
+ new TreePath(p.getFileStatus(), p.getParentId(), this));
+ }
+
+ FSTreeIterator(Path p) throws IOException {
+ try {
+ FileStatus s = fs.getFileStatus(root);
+ getPendingQueue().addFirst(new TreePath(s, -1L, this));
+ } catch (FileNotFoundException e) {
+ if (p.equals(root)) {
+ throw e;
+ }
+ throw new ConcurrentModificationException("FS modified");
+ }
+ }
+
+ @Override
+ public TreeIterator fork() {
+ if (getPendingQueue().isEmpty()) {
+ return new FSTreeIterator();
+ }
+ return new FSTreeIterator(getPendingQueue().removeFirst());
+ }
+
+ }
+
+ @Override
+ public TreeIterator iterator() {
+ try {
+ return new FSTreeIterator(root);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java
new file mode 100644
index 0000000000..e1e85c1a64
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Create FSImage from an external namespace.
+ */
+public class FileSystemImage implements Tool {
+
+ private Configuration conf;
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ // require absolute URI to write anywhere but local
+ FileSystem.setDefaultUri(conf, new File(".").toURI().toString());
+ }
+
+ protected void printUsage() {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("fs2img [OPTIONS] URI", new Options());
+ formatter.setSyntaxPrefix("");
+ formatter.printHelp("Options", options());
+ ToolRunner.printGenericCommandUsage(System.out);
+ }
+
+ static Options options() {
+ Options options = new Options();
+ options.addOption("o", "outdir", true, "Output directory");
+ options.addOption("u", "ugiclass", true, "UGI resolver class");
+ options.addOption("b", "blockclass", true, "Block output class");
+ options.addOption("i", "blockidclass", true, "Block resolver class");
+ options.addOption("c", "cachedirs", true, "Max active dirents");
+ options.addOption("h", "help", false, "Print usage");
+ return options;
+ }
+
+ @Override
+ public int run(String[] argv) throws Exception {
+ Options options = options();
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmd;
+ try {
+ cmd = parser.parse(options, argv);
+ } catch (ParseException e) {
+ System.out.println(
+ "Error parsing command-line options: " + e.getMessage());
+ printUsage();
+ return -1;
+ }
+
+ if (cmd.hasOption("h")) {
+ printUsage();
+ return -1;
+ }
+
+ ImageWriter.Options opts =
+ ReflectionUtils.newInstance(ImageWriter.Options.class, getConf());
+ for (Option o : cmd.getOptions()) {
+ switch (o.getOpt()) {
+ case "o":
+ opts.output(o.getValue());
+ break;
+ case "u":
+ opts.ugi(Class.forName(o.getValue()).asSubclass(UGIResolver.class));
+ break;
+ case "b":
+ opts.blocks(
+ Class.forName(o.getValue()).asSubclass(BlockFormat.class));
+ break;
+ case "i":
+ opts.blockIds(
+ Class.forName(o.getValue()).asSubclass(BlockResolver.class));
+ break;
+ case "c":
+ opts.cache(Integer.parseInt(o.getValue()));
+ break;
+ default:
+ throw new UnsupportedOperationException("Internal error");
+ }
+ }
+
+ String[] rem = cmd.getArgs();
+ if (rem.length != 1) {
+ printUsage();
+ return -1;
+ }
+
+ try (ImageWriter w = new ImageWriter(opts)) {
+ for (TreePath e : new FSTreeWalk(new Path(rem[0]), getConf())) {
+ w.accept(e); // add and continue
+ }
+ }
+ return 0;
+ }
+
+ public static void main(String[] argv) throws Exception {
+ int ret = ToolRunner.run(new FileSystemImage(), argv);
+ System.exit(ret);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java
new file mode 100644
index 0000000000..0c8ce6e8df
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Resolver mapping all files to a configurable, uniform blocksize
+ * and replication.
+ */
+public class FixedBlockMultiReplicaResolver extends FixedBlockResolver {
+
+ public static final String REPLICATION =
+ "hdfs.image.writer.resolver.fixed.block.replication";
+
+ private int replication;
+
+ @Override
+ public void setConf(Configuration conf) {
+ super.setConf(conf);
+ replication = conf.getInt(REPLICATION, 1);
+ }
+
+ public int getReplication(FileStatus s) {
+ return replication;
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java
new file mode 100644
index 0000000000..8ff9695998
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Resolver mapping all files to a configurable, uniform blocksize.
+ */
+public class FixedBlockResolver extends BlockResolver implements Configurable {
+
+ public static final String BLOCKSIZE =
+ "hdfs.image.writer.resolver.fixed.block.size";
+ public static final String START_BLOCK =
+ "hdfs.image.writer.resolver.fixed.block.start";
+
+ private Configuration conf;
+ private long blocksize = 256 * (1L << 20);
+ private final AtomicLong blockIds = new AtomicLong(0);
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ blocksize = conf.getLong(BLOCKSIZE, 256 * (1L << 20));
+ blockIds.set(conf.getLong(START_BLOCK, (1L << 30)));
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ protected List blockLengths(FileStatus s) {
+ ArrayList ret = new ArrayList<>();
+ if (!s.isFile()) {
+ return ret;
+ }
+ if (0 == s.getLen()) {
+ // the file has length 0; so we will have one block of size 0
+ ret.add(0L);
+ return ret;
+ }
+ int nblocks = (int)((s.getLen() - 1) / blocksize) + 1;
+ for (int i = 0; i < nblocks - 1; ++i) {
+ ret.add(blocksize);
+ }
+ long rem = s.getLen() % blocksize;
+ ret.add(0 == (rem % blocksize) ? blocksize : rem);
+ return ret;
+ }
+
+ @Override
+ public long nextId() {
+ return blockIds.incrementAndGet();
+ }
+
+ @Override
+ public long lastId() {
+ return blockIds.get();
+ }
+
+ @Override
+ public long preferredBlockSize(FileStatus s) {
+ return blocksize;
+ }
+
+ @Override
+ public int getReplication(FileStatus s) {
+ return 1;
+ }
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java
new file mode 100644
index 0000000000..ca16d962fe
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Dynamically assign ids to users/groups as they appear in the external
+ * filesystem.
+ */
+public class FsUGIResolver extends UGIResolver {
+
+ private int id;
+ private final Set usernames;
+ private final Set groupnames;
+
+ FsUGIResolver() {
+ super();
+ id = 0;
+ usernames = new HashSet();
+ groupnames = new HashSet();
+ }
+
+ @Override
+ public synchronized void addUser(String name) {
+ if (!usernames.contains(name)) {
+ addUser(name, id);
+ id++;
+ usernames.add(name);
+ }
+ }
+
+ @Override
+ public synchronized void addGroup(String name) {
+ if (!groupnames.contains(name)) {
+ addGroup(name, id);
+ id++;
+ groupnames.add(name);
+ }
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java
new file mode 100644
index 0000000000..a3603a118e
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java
@@ -0,0 +1,600 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.BufferedOutputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FilterOutputStream;
+import java.io.OutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicLong;
+
+import com.google.common.base.Charsets;
+import com.google.protobuf.CodedOutputStream;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection.DirEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressorStream;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+
+import static org.apache.hadoop.hdfs.server.namenode.FSImageUtil.MAGIC_HEADER;
+
+/**
+ * Utility crawling an existing hierarchical FileSystem and emitting
+ * a valid FSImage/NN storage.
+ */
+// TODO: generalize to types beyond FileRegion
+public class ImageWriter implements Closeable {
+
+ private static final int ONDISK_VERSION = 1;
+ private static final int LAYOUT_VERSION = -64; // see NameNodeLayoutVersion
+
+ private final Path outdir;
+ private final FileSystem outfs;
+ private final File dirsTmp;
+ private final OutputStream dirs;
+ private final File inodesTmp;
+ private final OutputStream inodes;
+ private final MessageDigest digest;
+ private final FSImageCompression compress;
+ private final long startBlock;
+ private final long startInode;
+ private final UGIResolver ugis;
+ private final BlockFormat.Writer blocks;
+ private final BlockResolver blockIds;
+ private final Map dircache;
+ private final TrackedOutputStream raw;
+
+ private boolean closed = false;
+ private long curSec;
+ private long curBlock;
+ private final AtomicLong curInode;
+ private final FileSummary.Builder summary = FileSummary.newBuilder()
+ .setOndiskVersion(ONDISK_VERSION)
+ .setLayoutVersion(LAYOUT_VERSION);
+
+ private final String blockPoolID;
+
+ public static Options defaults() {
+ return new Options();
+ }
+
+ @SuppressWarnings("unchecked")
+ public ImageWriter(Options opts) throws IOException {
+ final OutputStream out;
+ if (null == opts.outStream) {
+ FileSystem fs = opts.outdir.getFileSystem(opts.getConf());
+ outfs = (fs instanceof LocalFileSystem)
+ ? ((LocalFileSystem)fs).getRaw()
+ : fs;
+ Path tmp = opts.outdir;
+ if (!outfs.mkdirs(tmp)) {
+ throw new IOException("Failed to create output dir: " + tmp);
+ }
+ try (NNStorage stor = new NNStorage(opts.getConf(),
+ Arrays.asList(tmp.toUri()), Arrays.asList(tmp.toUri()))) {
+ NamespaceInfo info = NNStorage.newNamespaceInfo();
+ if (info.getLayoutVersion() != LAYOUT_VERSION) {
+ throw new IllegalStateException("Incompatible layout " +
+ info.getLayoutVersion() + " (expected " + LAYOUT_VERSION);
+ }
+ stor.format(info);
+ blockPoolID = info.getBlockPoolID();
+ }
+ outdir = new Path(tmp, "current");
+ out = outfs.create(new Path(outdir, "fsimage_0000000000000000000"));
+ } else {
+ // XXX necessary? writing a NNStorage now...
+ outdir = null;
+ outfs = null;
+ out = opts.outStream;
+ blockPoolID = "";
+ }
+ digest = MD5Hash.getDigester();
+ raw = new TrackedOutputStream<>(new DigestOutputStream(
+ new BufferedOutputStream(out), digest));
+ compress = opts.compress;
+ CompressionCodec codec = compress.getImageCodec();
+ if (codec != null) {
+ summary.setCodec(codec.getClass().getCanonicalName());
+ }
+ startBlock = opts.startBlock;
+ curBlock = startBlock;
+ startInode = opts.startInode;
+ curInode = new AtomicLong(startInode);
+ dircache = Collections.synchronizedMap(new DirEntryCache(opts.maxdircache));
+
+ ugis = null == opts.ugis
+ ? ReflectionUtils.newInstance(opts.ugisClass, opts.getConf())
+ : opts.ugis;
+ BlockFormat fmt = null == opts.blocks
+ ? ReflectionUtils.newInstance(opts.blockFormatClass, opts.getConf())
+ : opts.blocks;
+ blocks = fmt.getWriter(null);
+ blockIds = null == opts.blockIds
+ ? ReflectionUtils.newInstance(opts.blockIdsClass, opts.getConf())
+ : opts.blockIds;
+
+ // create directory and inode sections as side-files.
+ // The details are written to files to avoid keeping them in memory.
+ dirsTmp = File.createTempFile("fsimg_dir", null);
+ dirsTmp.deleteOnExit();
+ dirs = beginSection(new FileOutputStream(dirsTmp));
+ try {
+ inodesTmp = File.createTempFile("fsimg_inode", null);
+ inodesTmp.deleteOnExit();
+ inodes = new FileOutputStream(inodesTmp);
+ } catch (IOException e) {
+ // appropriate to close raw?
+ IOUtils.cleanup(null, raw, dirs);
+ throw e;
+ }
+
+ raw.write(MAGIC_HEADER);
+ curSec = raw.pos;
+ assert raw.pos == MAGIC_HEADER.length;
+ }
+
+ public void accept(TreePath e) throws IOException {
+ assert e.getParentId() < curInode.get();
+ // allocate ID
+ long id = curInode.getAndIncrement();
+ e.accept(id);
+ assert e.getId() < curInode.get();
+ INode n = e.toINode(ugis, blockIds, blocks, blockPoolID);
+ writeInode(n);
+
+ if (e.getParentId() > 0) {
+ // add DirEntry to map, which may page out entries
+ DirEntry.Builder de = DirEntry.newBuilder()
+ .setParent(e.getParentId())
+ .addChildren(e.getId());
+ dircache.put(e.getParentId(), de);
+ }
+ }
+
+ @SuppressWarnings("serial")
+ class DirEntryCache extends LinkedHashMap {
+
+ // should cache path to root, not evict LRCached
+ private final int nEntries;
+
+ DirEntryCache(int nEntries) {
+ this.nEntries = nEntries;
+ }
+
+ @Override
+ public DirEntry.Builder put(Long p, DirEntry.Builder b) {
+ DirEntry.Builder e = get(p);
+ if (null == e) {
+ return super.put(p, b);
+ }
+ //merge
+ e.addAllChildren(b.getChildrenList());
+ // not strictly conforming
+ return e;
+ }
+
+ @Override
+ protected boolean removeEldestEntry(Entry be) {
+ if (size() > nEntries) {
+ DirEntry d = be.getValue().build();
+ try {
+ writeDirEntry(d);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return true;
+ }
+ return false;
+ }
+ }
+
+ synchronized void writeInode(INode n) throws IOException {
+ n.writeDelimitedTo(inodes);
+ }
+
+ synchronized void writeDirEntry(DirEntry e) throws IOException {
+ e.writeDelimitedTo(dirs);
+ }
+
+ // from FSImageFormatProtobuf... why not just read position from the stream?
+ private static int getOndiskSize(com.google.protobuf.GeneratedMessage s) {
+ return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize())
+ + s.getSerializedSize();
+ }
+
+ @Override
+ public synchronized void close() throws IOException {
+ if (closed) {
+ return;
+ }
+ for (DirEntry.Builder b : dircache.values()) {
+ DirEntry e = b.build();
+ writeDirEntry(e);
+ }
+ dircache.clear();
+
+ // close side files
+ IOUtils.cleanup(null, dirs, inodes, blocks);
+ if (null == dirs || null == inodes) {
+ // init failed
+ if (raw != null) {
+ raw.close();
+ }
+ return;
+ }
+ try {
+ writeNameSystemSection();
+ writeINodeSection();
+ writeDirSection();
+ writeStringTableSection();
+
+ // write summary directly to raw
+ FileSummary s = summary.build();
+ s.writeDelimitedTo(raw);
+ int length = getOndiskSize(s);
+ byte[] lengthBytes = new byte[4];
+ ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length);
+ raw.write(lengthBytes);
+ } finally {
+ raw.close();
+ }
+ writeMD5("fsimage_0000000000000000000");
+ closed = true;
+ }
+
+ /**
+ * Write checksum for image file. Pulled from MD5Utils/internals. Awkward to
+ * reuse existing tools/utils.
+ */
+ void writeMD5(String imagename) throws IOException {
+ if (null == outdir) {
+ //LOG.warn("Not writing MD5");
+ return;
+ }
+ MD5Hash md5 = new MD5Hash(digest.digest());
+ String digestString = StringUtils.byteToHexString(md5.getDigest());
+ Path chk = new Path(outdir, imagename + ".md5");
+ try (OutputStream out = outfs.create(chk)) {
+ String md5Line = digestString + " *" + imagename + "\n";
+ out.write(md5Line.getBytes(Charsets.UTF_8));
+ }
+ }
+
+ OutputStream beginSection(OutputStream out) throws IOException {
+ CompressionCodec codec = compress.getImageCodec();
+ if (null == codec) {
+ return out;
+ }
+ return codec.createOutputStream(out);
+ }
+
+ void endSection(OutputStream out, SectionName name) throws IOException {
+ CompressionCodec codec = compress.getImageCodec();
+ if (codec != null) {
+ ((CompressorStream)out).finish();
+ }
+ out.flush();
+ long length = raw.pos - curSec;
+ summary.addSections(FileSummary.Section.newBuilder()
+ .setName(name.toString()) // not strictly correct, but name not visible
+ .setOffset(curSec).setLength(length));
+ curSec += length;
+ }
+
+ void writeNameSystemSection() throws IOException {
+ NameSystemSection.Builder b = NameSystemSection.newBuilder()
+ .setGenstampV1(1000)
+ .setGenstampV1Limit(0)
+ .setGenstampV2(1001)
+ .setLastAllocatedBlockId(blockIds.lastId())
+ .setTransactionId(0);
+ NameSystemSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ endSection(sec, SectionName.NS_INFO);
+ }
+
+ void writeINodeSection() throws IOException {
+ // could reset dict to avoid compression cost in close
+ INodeSection.Builder b = INodeSection.newBuilder()
+ .setNumInodes(curInode.get() - startInode)
+ .setLastInodeId(curInode.get());
+ INodeSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ // copy inodes
+ try (FileInputStream in = new FileInputStream(inodesTmp)) {
+ IOUtils.copyBytes(in, sec, 4096, false);
+ }
+ endSection(sec, SectionName.INODE);
+ }
+
+ void writeDirSection() throws IOException {
+ // No header, so dirs can be written/compressed independently
+ //INodeDirectorySection.Builder b = INodeDirectorySection.newBuilder();
+ OutputStream sec = raw;
+ // copy dirs
+ try (FileInputStream in = new FileInputStream(dirsTmp)) {
+ IOUtils.copyBytes(in, sec, 4096, false);
+ }
+ endSection(sec, SectionName.INODE_DIR);
+ }
+
+ void writeFilesUCSection() throws IOException {
+ FilesUnderConstructionSection.Builder b =
+ FilesUnderConstructionSection.newBuilder();
+ FilesUnderConstructionSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ endSection(sec, SectionName.FILES_UNDERCONSTRUCTION);
+ }
+
+ void writeSnapshotDiffSection() throws IOException {
+ SnapshotDiffSection.Builder b = SnapshotDiffSection.newBuilder();
+ SnapshotDiffSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ endSection(sec, SectionName.SNAPSHOT_DIFF);
+ }
+
+ void writeSecretManagerSection() throws IOException {
+ SecretManagerSection.Builder b = SecretManagerSection.newBuilder()
+ .setCurrentId(0)
+ .setTokenSequenceNumber(0);
+ SecretManagerSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ endSection(sec, SectionName.SECRET_MANAGER);
+ }
+
+ void writeCacheManagerSection() throws IOException {
+ CacheManagerSection.Builder b = CacheManagerSection.newBuilder()
+ .setNumPools(0)
+ .setNumDirectives(0)
+ .setNextDirectiveId(1);
+ CacheManagerSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ endSection(sec, SectionName.CACHE_MANAGER);
+ }
+
+ void writeStringTableSection() throws IOException {
+ StringTableSection.Builder b = StringTableSection.newBuilder();
+ Map u = ugis.ugiMap();
+ b.setNumEntry(u.size());
+ StringTableSection s = b.build();
+
+ OutputStream sec = beginSection(raw);
+ s.writeDelimitedTo(sec);
+ for (Map.Entry e : u.entrySet()) {
+ StringTableSection.Entry.Builder x =
+ StringTableSection.Entry.newBuilder()
+ .setId(e.getKey())
+ .setStr(e.getValue());
+ x.build().writeDelimitedTo(sec);
+ }
+ endSection(sec, SectionName.STRING_TABLE);
+ }
+
+ @Override
+ public synchronized String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("{ codec=\"").append(compress.getImageCodec());
+ sb.append("\", startBlock=").append(startBlock);
+ sb.append(", curBlock=").append(curBlock);
+ sb.append(", startInode=").append(startInode);
+ sb.append(", curInode=").append(curInode);
+ sb.append(", ugi=").append(ugis);
+ sb.append(", blockIds=").append(blockIds);
+ sb.append(", offset=").append(raw.pos);
+ sb.append(" }");
+ return sb.toString();
+ }
+
+ static class TrackedOutputStream
+ extends FilterOutputStream {
+
+ private long pos = 0L;
+
+ TrackedOutputStream(T out) {
+ super(out);
+ }
+
+ @SuppressWarnings("unchecked")
+ public T getInner() {
+ return (T) out;
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ out.write(b);
+ ++pos;
+ }
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ write(b, 0, b.length);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ out.write(b, off, len);
+ pos += len;
+ }
+
+ @Override
+ public void flush() throws IOException {
+ super.flush();
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ }
+
+ }
+
+ /**
+ * Configurable options for image generation mapping pluggable components.
+ */
+ public static class Options implements Configurable {
+
+ public static final String START_INODE = "hdfs.image.writer.start.inode";
+ public static final String CACHE_ENTRY = "hdfs.image.writer.cache.entries";
+ public static final String UGI_CLASS = "hdfs.image.writer.ugi.class";
+ public static final String BLOCK_RESOLVER_CLASS =
+ "hdfs.image.writer.blockresolver.class";
+
+ private Path outdir;
+ private Configuration conf;
+ private OutputStream outStream;
+ private int maxdircache;
+ private long startBlock;
+ private long startInode;
+ private UGIResolver ugis;
+ private Class extends UGIResolver> ugisClass;
+ private BlockFormat blocks;
+
+ @SuppressWarnings("rawtypes")
+ private Class extends BlockFormat> blockFormatClass;
+ private BlockResolver blockIds;
+ private Class extends BlockResolver> blockIdsClass;
+ private FSImageCompression compress =
+ FSImageCompression.createNoopCompression();
+
+ protected Options() {
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ //long lastTxn = conf.getLong(LAST_TXN, 0L);
+ String def = new File("hdfs/name").toURI().toString();
+ outdir = new Path(conf.get(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, def));
+ startBlock = conf.getLong(FixedBlockResolver.START_BLOCK, (1L << 30) + 1);
+ startInode = conf.getLong(START_INODE, (1L << 14) + 1);
+ maxdircache = conf.getInt(CACHE_ENTRY, 100);
+ ugisClass = conf.getClass(UGI_CLASS,
+ SingleUGIResolver.class, UGIResolver.class);
+ blockFormatClass = conf.getClass(
+ DFSConfigKeys.DFS_PROVIDER_BLK_FORMAT_CLASS,
+ NullBlockFormat.class, BlockFormat.class);
+ blockIdsClass = conf.getClass(BLOCK_RESOLVER_CLASS,
+ FixedBlockResolver.class, BlockResolver.class);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ public Options output(String out) {
+ this.outdir = new Path(out);
+ return this;
+ }
+
+ public Options outStream(OutputStream outStream) {
+ this.outStream = outStream;
+ return this;
+ }
+
+ public Options codec(String codec) throws IOException {
+ this.compress = FSImageCompression.createCompression(getConf(), codec);
+ return this;
+ }
+
+ public Options cache(int nDirEntries) {
+ this.maxdircache = nDirEntries;
+ return this;
+ }
+
+ public Options ugi(UGIResolver ugis) {
+ this.ugis = ugis;
+ return this;
+ }
+
+ public Options ugi(Class extends UGIResolver> ugisClass) {
+ this.ugisClass = ugisClass;
+ return this;
+ }
+
+ public Options blockIds(BlockResolver blockIds) {
+ this.blockIds = blockIds;
+ return this;
+ }
+
+ public Options blockIds(Class extends BlockResolver> blockIdsClass) {
+ this.blockIdsClass = blockIdsClass;
+ return this;
+ }
+
+ public Options blocks(BlockFormat blocks) {
+ this.blocks = blocks;
+ return this;
+ }
+
+ @SuppressWarnings("rawtypes")
+ public Options blocks(Class extends BlockFormat> blocksClass) {
+ this.blockFormatClass = blocksClass;
+ return this;
+ }
+
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java
new file mode 100644
index 0000000000..aabdf74820
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.BlockFormat.Reader.Options;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+
+/**
+ * Null sink for region information emitted from FSImage.
+ */
+public class NullBlockFormat extends BlockFormat {
+
+ @Override
+ public Reader getReader(Options opts) throws IOException {
+ return new Reader() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ @Override
+ public boolean hasNext() {
+ return false;
+ }
+ @Override
+ public FileRegion next() {
+ throw new NoSuchElementException();
+ }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+ @Override
+ public void close() throws IOException {
+ // do nothing
+ }
+
+ @Override
+ public FileRegion resolve(Block ident) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+ @Override
+ public Writer getWriter(Writer.Options opts) throws IOException {
+ return new Writer() {
+ @Override
+ public void store(FileRegion token) throws IOException {
+ // do nothing
+ }
+
+ @Override
+ public void close() throws IOException {
+ // do nothing
+ }
+ };
+ }
+
+ @Override
+ public void refresh() throws IOException {
+ // do nothing
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java
new file mode 100644
index 0000000000..0fd3f2b138
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * Map all owners/groups in external system to a single user in FSImage.
+ */
+public class SingleUGIResolver extends UGIResolver implements Configurable {
+
+ public static final String UID = "hdfs.image.writer.ugi.single.uid";
+ public static final String USER = "hdfs.image.writer.ugi.single.user";
+ public static final String GID = "hdfs.image.writer.ugi.single.gid";
+ public static final String GROUP = "hdfs.image.writer.ugi.single.group";
+
+ private int uid;
+ private int gid;
+ private String user;
+ private String group;
+ private Configuration conf;
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ uid = conf.getInt(UID, 0);
+ user = conf.get(USER);
+ if (null == user) {
+ try {
+ user = UserGroupInformation.getCurrentUser().getShortUserName();
+ } catch (IOException e) {
+ user = "hadoop";
+ }
+ }
+ gid = conf.getInt(GID, 1);
+ group = conf.get(GROUP);
+ if (null == group) {
+ group = user;
+ }
+
+ resetUGInfo();
+ addUser(user, uid);
+ addGroup(group, gid);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public String user(FileStatus s) {
+ return user;
+ }
+
+ @Override
+ public String group(FileStatus s) {
+ return group;
+ }
+
+ @Override
+ public void addUser(String name) {
+ //do nothing
+ }
+
+ @Override
+ public void addGroup(String name) {
+ //do nothing
+ }
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java
new file mode 100644
index 0000000000..14e6bed07f
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import com.google.protobuf.ByteString;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import static org.apache.hadoop.hdfs.DFSUtil.string2Bytes;
+import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA;
+import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA;
+
+/**
+ * Traversal cursor in external filesystem.
+ * TODO: generalize, move FS/FileRegion to FSTreePath
+ */
+public class TreePath {
+ private long id = -1;
+ private final long parentId;
+ private final FileStatus stat;
+ private final TreeWalk.TreeIterator i;
+
+ protected TreePath(FileStatus stat, long parentId, TreeWalk.TreeIterator i) {
+ this.i = i;
+ this.stat = stat;
+ this.parentId = parentId;
+ }
+
+ public FileStatus getFileStatus() {
+ return stat;
+ }
+
+ public long getParentId() {
+ return parentId;
+ }
+
+ public long getId() {
+ if (id < 0) {
+ throw new IllegalStateException();
+ }
+ return id;
+ }
+
+ void accept(long id) {
+ this.id = id;
+ i.onAccept(this, id);
+ }
+
+ public INode toINode(UGIResolver ugi, BlockResolver blk,
+ BlockFormat.Writer out, String blockPoolID)
+ throws IOException {
+ if (stat.isFile()) {
+ return toFile(ugi, blk, out, blockPoolID);
+ } else if (stat.isDirectory()) {
+ return toDirectory(ugi);
+ } else if (stat.isSymlink()) {
+ throw new UnsupportedOperationException("symlinks not supported");
+ } else {
+ throw new UnsupportedOperationException("Unknown type: " + stat);
+ }
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof TreePath)) {
+ return false;
+ }
+ TreePath o = (TreePath) other;
+ return getParentId() == o.getParentId()
+ && getFileStatus().equals(o.getFileStatus());
+ }
+
+ @Override
+ public int hashCode() {
+ long pId = getParentId() * getFileStatus().hashCode();
+ return (int)(pId ^ (pId >>> 32));
+ }
+
+ void writeBlock(long blockId, long offset, long length,
+ long genStamp, String blockPoolID,
+ BlockFormat.Writer out) throws IOException {
+ FileStatus s = getFileStatus();
+ out.store(new FileRegion(blockId, s.getPath(), offset, length,
+ blockPoolID, genStamp));
+ }
+
+ INode toFile(UGIResolver ugi, BlockResolver blk,
+ BlockFormat.Writer out, String blockPoolID)
+ throws IOException {
+ final FileStatus s = getFileStatus();
+ // TODO should this store resolver's user/group?
+ ugi.addUser(s.getOwner());
+ ugi.addGroup(s.getGroup());
+ INodeFile.Builder b = INodeFile.newBuilder()
+ .setReplication(blk.getReplication(s))
+ .setModificationTime(s.getModificationTime())
+ .setAccessTime(s.getAccessTime())
+ .setPreferredBlockSize(blk.preferredBlockSize(s))
+ .setPermission(ugi.resolve(s))
+ .setStoragePolicyID(HdfsConstants.PROVIDED_STORAGE_POLICY_ID);
+ //TODO: storage policy should be configurable per path; use BlockResolver
+ long off = 0L;
+ for (BlockProto block : blk.resolve(s)) {
+ b.addBlocks(block);
+ writeBlock(block.getBlockId(), off, block.getNumBytes(),
+ block.getGenStamp(), blockPoolID, out);
+ off += block.getNumBytes();
+ }
+ INode.Builder ib = INode.newBuilder()
+ .setType(INode.Type.FILE)
+ .setId(id)
+ .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName())))
+ .setFile(b);
+ return ib.build();
+ }
+
+ INode toDirectory(UGIResolver ugi) {
+ final FileStatus s = getFileStatus();
+ ugi.addUser(s.getOwner());
+ ugi.addGroup(s.getGroup());
+ INodeDirectory.Builder b = INodeDirectory.newBuilder()
+ .setModificationTime(s.getModificationTime())
+ .setNsQuota(DEFAULT_NAMESPACE_QUOTA)
+ .setDsQuota(DEFAULT_STORAGE_SPACE_QUOTA)
+ .setPermission(ugi.resolve(s));
+ INode.Builder ib = INode.newBuilder()
+ .setType(INode.Type.DIRECTORY)
+ .setId(id)
+ .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName())))
+ .setDirectory(b);
+ return ib.build();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("{ stat=\"").append(getFileStatus()).append("\"");
+ sb.append(", id=").append(getId());
+ sb.append(", parentId=").append(getParentId());
+ sb.append(", iterObjId=").append(System.identityHashCode(i));
+ sb.append(" }");
+ return sb.toString();
+ }
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java
new file mode 100644
index 0000000000..7fd26f9e50
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+
+/**
+ * Traversal yielding a hierarchical sequence of paths.
+ */
+public abstract class TreeWalk implements Iterable {
+
+ /**
+ * @param path path to the node being explored.
+ * @param id the id of the node.
+ * @param iterator the {@link TreeIterator} to use.
+ * @return paths representing the children of the current node.
+ */
+ protected abstract Iterable getChildren(
+ TreePath path, long id, TreeWalk.TreeIterator iterator);
+
+ public abstract TreeIterator iterator();
+
+ /**
+ * Enumerator class for hierarchies. Implementations SHOULD support a fork()
+ * operation yielding a subtree of the current cursor.
+ */
+ public abstract class TreeIterator implements Iterator {
+
+ private final Deque pending;
+
+ TreeIterator() {
+ this(new ArrayDeque());
+ }
+
+ protected TreeIterator(Deque pending) {
+ this.pending = pending;
+ }
+
+ public abstract TreeIterator fork();
+
+ @Override
+ public boolean hasNext() {
+ return !pending.isEmpty();
+ }
+
+ @Override
+ public TreePath next() {
+ return pending.removeFirst();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ protected void onAccept(TreePath p, long id) {
+ for (TreePath k : getChildren(p, id, this)) {
+ pending.addFirst(k);
+ }
+ }
+
+ /**
+ * @return the Deque containing the pending paths.
+ */
+ protected Deque getPendingQueue() {
+ return pending;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("{ Treewalk=\"").append(TreeWalk.this.toString());
+ sb.append(", pending=[");
+ Iterator i = pending.iterator();
+ if (i.hasNext()) {
+ sb.append("\"").append(i.next()).append("\"");
+ }
+ while (i.hasNext()) {
+ sb.append(", \"").append(i.next()).append("\"");
+ }
+ sb.append("]");
+ sb.append(" }");
+ return sb.toString();
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java
new file mode 100644
index 0000000000..2d50668b68
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+/**
+ * Pluggable class for mapping ownership and permissions from an external
+ * store to an FSImage.
+ */
+public abstract class UGIResolver {
+
+ static final int USER_STRID_OFFSET = 40;
+ static final int GROUP_STRID_OFFSET = 16;
+ static final long USER_GROUP_STRID_MASK = (1 << 24) - 1;
+
+ /**
+ * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big
+ * Endian).
+ * The first and the second parts are the string ids of the user and
+ * group name, and the last 16 bits are the permission bits.
+ * @param owner name of owner
+ * @param group name of group
+ * @param permission Permission octects
+ * @return FSImage encoding of permissions
+ */
+ protected final long buildPermissionStatus(
+ String owner, String group, short permission) {
+
+ long userId = users.get(owner);
+ if (0L != ((~USER_GROUP_STRID_MASK) & userId)) {
+ throw new IllegalArgumentException("UID must fit in 24 bits");
+ }
+
+ long groupId = groups.get(group);
+ if (0L != ((~USER_GROUP_STRID_MASK) & groupId)) {
+ throw new IllegalArgumentException("GID must fit in 24 bits");
+ }
+ return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET)
+ | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET)
+ | permission;
+ }
+
+ private final Map users;
+ private final Map groups;
+
+ public UGIResolver() {
+ this(new HashMap(), new HashMap());
+ }
+
+ UGIResolver(Map users, Map groups) {
+ this.users = users;
+ this.groups = groups;
+ }
+
+ public Map ugiMap() {
+ Map ret = new HashMap<>();
+ for (Map m : Arrays.asList(users, groups)) {
+ for (Map.Entry e : m.entrySet()) {
+ String s = ret.put(e.getValue(), e.getKey());
+ if (s != null) {
+ throw new IllegalStateException("Duplicate mapping: " +
+ e.getValue() + " " + s + " " + e.getKey());
+ }
+ }
+ }
+ return ret;
+ }
+
+ public abstract void addUser(String name);
+
+ protected void addUser(String name, int id) {
+ Integer uid = users.put(name, id);
+ if (uid != null) {
+ throw new IllegalArgumentException("Duplicate mapping: " + name +
+ " " + uid + " " + id);
+ }
+ }
+
+ public abstract void addGroup(String name);
+
+ protected void addGroup(String name, int id) {
+ Integer gid = groups.put(name, id);
+ if (gid != null) {
+ throw new IllegalArgumentException("Duplicate mapping: " + name +
+ " " + gid + " " + id);
+ }
+ }
+
+ protected void resetUGInfo() {
+ users.clear();
+ groups.clear();
+ }
+
+ public long resolve(FileStatus s) {
+ return buildPermissionStatus(user(s), group(s), permission(s).toShort());
+ }
+
+ public String user(FileStatus s) {
+ return s.getOwner();
+ }
+
+ public String group(FileStatus s) {
+ return s.getGroup();
+ }
+
+ public FsPermission permission(FileStatus s) {
+ return s.getPermission();
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java
new file mode 100644
index 0000000000..956292e4ea
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+package org.apache.hadoop.hdfs.server.namenode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java
new file mode 100644
index 0000000000..c82c489463
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Random, repeatable hierarchy generator.
+ */
+public class RandomTreeWalk extends TreeWalk {
+
+ private final Path root;
+ private final long seed;
+ private final float depth;
+ private final int children;
+ private final Map mSeed;
+ //private final AtomicLong blockIds = new AtomicLong(1L << 30);
+
+ RandomTreeWalk(long seed) {
+ this(seed, 10);
+ }
+
+ RandomTreeWalk(long seed, int children) {
+ this(seed, children, 0.15f);
+ }
+
+ RandomTreeWalk(long seed, int children, float depth) {
+ this(randomRoot(seed), seed, children, 0.15f);
+ }
+
+ RandomTreeWalk(Path root, long seed, int children, float depth) {
+ this.seed = seed;
+ this.depth = depth;
+ this.children = children;
+ mSeed = Collections.synchronizedMap(new HashMap());
+ mSeed.put(-1L, seed);
+ this.root = root;
+ }
+
+ static Path randomRoot(long seed) {
+ Random r = new Random(seed);
+ String scheme;
+ do {
+ scheme = genName(r, 3, 5).toLowerCase();
+ } while (Character.isDigit(scheme.charAt(0)));
+ String authority = genName(r, 3, 15).toLowerCase();
+ int port = r.nextInt(1 << 13) + 1000;
+ return new Path(scheme, authority + ":" + port, "/");
+ }
+
+ @Override
+ public TreeIterator iterator() {
+ return new RandomTreeIterator(seed);
+ }
+
+ @Override
+ protected Iterable getChildren(TreePath p, long id,
+ TreeIterator walk) {
+ final FileStatus pFs = p.getFileStatus();
+ if (pFs.isFile()) {
+ return Collections.emptyList();
+ }
+ // seed is f(parent seed, attrib)
+ long cseed = mSeed.get(p.getParentId()) * p.getFileStatus().hashCode();
+ mSeed.put(p.getId(), cseed);
+ Random r = new Random(cseed);
+
+ int nChildren = r.nextInt(children);
+ ArrayList ret = new ArrayList();
+ for (int i = 0; i < nChildren; ++i) {
+ ret.add(new TreePath(genFileStatus(p, r), p.getId(), walk));
+ }
+ return ret;
+ }
+
+ FileStatus genFileStatus(TreePath parent, Random r) {
+ final int blocksize = 128 * (1 << 20);
+ final Path name;
+ final boolean isDir;
+ if (null == parent) {
+ name = root;
+ isDir = true;
+ } else {
+ Path p = parent.getFileStatus().getPath();
+ name = new Path(p, genName(r, 3, 10));
+ isDir = r.nextFloat() < depth;
+ }
+ final long len = isDir ? 0 : r.nextInt(Integer.MAX_VALUE);
+ final int nblocks = 0 == len ? 0 : (((int)((len - 1) / blocksize)) + 1);
+ BlockLocation[] blocks = genBlocks(r, nblocks, blocksize, len);
+ try {
+ return new LocatedFileStatus(new FileStatus(
+ len, /* long length, */
+ isDir, /* boolean isdir, */
+ 1, /* int block_replication, */
+ blocksize, /* long blocksize, */
+ 0L, /* long modification_time, */
+ 0L, /* long access_time, */
+ null, /* FsPermission permission, */
+ "hadoop", /* String owner, */
+ "hadoop", /* String group, */
+ name), /* Path path */
+ blocks);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ BlockLocation[] genBlocks(Random r, int nblocks, int blocksize, long len) {
+ BlockLocation[] blocks = new BlockLocation[nblocks];
+ if (0 == nblocks) {
+ return blocks;
+ }
+ for (int i = 0; i < nblocks - 1; ++i) {
+ blocks[i] = new BlockLocation(null, null, i * blocksize, blocksize);
+ }
+ blocks[nblocks - 1] = new BlockLocation(null, null,
+ (nblocks - 1) * blocksize,
+ 0 == (len % blocksize) ? blocksize : len % blocksize);
+ return blocks;
+ }
+
+ static String genName(Random r, int min, int max) {
+ int len = r.nextInt(max - min + 1) + min;
+ char[] ret = new char[len];
+ while (len > 0) {
+ int c = r.nextInt() & 0x7F; // restrict to ASCII
+ if (Character.isLetterOrDigit(c)) {
+ ret[--len] = (char) c;
+ }
+ }
+ return new String(ret);
+ }
+
+ class RandomTreeIterator extends TreeIterator {
+
+ RandomTreeIterator() {
+ }
+
+ RandomTreeIterator(long seed) {
+ Random r = new Random(seed);
+ FileStatus iroot = genFileStatus(null, r);
+ getPendingQueue().addFirst(new TreePath(iroot, -1, this));
+ }
+
+ RandomTreeIterator(TreePath p) {
+ getPendingQueue().addFirst(
+ new TreePath(p.getFileStatus(), p.getParentId(), this));
+ }
+
+ @Override
+ public TreeIterator fork() {
+ if (getPendingQueue().isEmpty()) {
+ return new RandomTreeIterator();
+ }
+ return new RandomTreeIterator(getPendingQueue().removeFirst());
+ }
+
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java
new file mode 100644
index 0000000000..8b52ffd8fa
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import static org.junit.Assert.*;
+
+/**
+ * Validate fixed-size block partitioning.
+ */
+public class TestFixedBlockResolver {
+
+ @Rule public TestName name = new TestName();
+
+ private final FixedBlockResolver blockId = new FixedBlockResolver();
+
+ @Before
+ public void setup() {
+ Configuration conf = new Configuration(false);
+ conf.setLong(FixedBlockResolver.BLOCKSIZE, 512L * (1L << 20));
+ conf.setLong(FixedBlockResolver.START_BLOCK, 512L * (1L << 20));
+ blockId.setConf(conf);
+ System.out.println(name.getMethodName());
+ }
+
+ @Test
+ public void testExactBlock() throws Exception {
+ FileStatus f = file(512, 256);
+ int nblocks = 0;
+ for (BlockProto b : blockId.resolve(f)) {
+ ++nblocks;
+ assertEquals(512L * (1L << 20), b.getNumBytes());
+ }
+ assertEquals(1, nblocks);
+
+ FileStatus g = file(1024, 256);
+ nblocks = 0;
+ for (BlockProto b : blockId.resolve(g)) {
+ ++nblocks;
+ assertEquals(512L * (1L << 20), b.getNumBytes());
+ }
+ assertEquals(2, nblocks);
+
+ FileStatus h = file(5120, 256);
+ nblocks = 0;
+ for (BlockProto b : blockId.resolve(h)) {
+ ++nblocks;
+ assertEquals(512L * (1L << 20), b.getNumBytes());
+ }
+ assertEquals(10, nblocks);
+ }
+
+ @Test
+ public void testEmpty() throws Exception {
+ FileStatus f = file(0, 100);
+ Iterator b = blockId.resolve(f).iterator();
+ assertTrue(b.hasNext());
+ assertEquals(0, b.next().getNumBytes());
+ assertFalse(b.hasNext());
+ }
+
+ @Test
+ public void testRandomFile() throws Exception {
+ Random r = new Random();
+ long seed = r.nextLong();
+ System.out.println("seed: " + seed);
+ r.setSeed(seed);
+
+ int len = r.nextInt(4096) + 512;
+ int blk = r.nextInt(len - 128) + 128;
+ FileStatus s = file(len, blk);
+ long nbytes = 0;
+ for (BlockProto b : blockId.resolve(s)) {
+ nbytes += b.getNumBytes();
+ assertTrue(512L * (1L << 20) >= b.getNumBytes());
+ }
+ assertEquals(s.getLen(), nbytes);
+ }
+
+ FileStatus file(long lenMB, long blocksizeMB) {
+ Path p = new Path("foo://bar:4344/baz/dingo");
+ return new FileStatus(
+ lenMB * (1 << 20), /* long length, */
+ false, /* boolean isdir, */
+ 1, /* int block_replication, */
+ blocksizeMB * (1 << 20), /* long blocksize, */
+ 0L, /* long modification_time, */
+ 0L, /* long access_time, */
+ null, /* FsPermission permission, */
+ "hadoop", /* String owner, */
+ "hadoop", /* String group, */
+ p); /* Path path */
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java
new file mode 100644
index 0000000000..b8e6ac9ac6
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import static org.junit.Assert.*;
+
+/**
+ * Validate randomly generated hierarchies, including fork() support in
+ * base class.
+ */
+public class TestRandomTreeWalk {
+
+ @Rule public TestName name = new TestName();
+
+ private Random r = new Random();
+
+ @Before
+ public void setSeed() {
+ long seed = r.nextLong();
+ r.setSeed(seed);
+ System.out.println(name.getMethodName() + " seed: " + seed);
+ }
+
+ @Test
+ public void testRandomTreeWalkRepeat() throws Exception {
+ Set ns = new HashSet<>();
+ final long seed = r.nextLong();
+ RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .1f);
+ int i = 0;
+ for (TreePath p : t1) {
+ p.accept(i++);
+ assertTrue(ns.add(p));
+ }
+
+ RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .1f);
+ int j = 0;
+ for (TreePath p : t2) {
+ p.accept(j++);
+ assertTrue(ns.remove(p));
+ }
+ assertTrue(ns.isEmpty());
+ }
+
+ @Test
+ public void testRandomTreeWalkFork() throws Exception {
+ Set ns = new HashSet<>();
+
+ final long seed = r.nextLong();
+ RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .15f);
+ int i = 0;
+ for (TreePath p : t1) {
+ p.accept(i++);
+ assertTrue(ns.add(p.getFileStatus()));
+ }
+
+ RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .15f);
+ int j = 0;
+ ArrayList iters = new ArrayList<>();
+ iters.add(t2.iterator());
+ while (!iters.isEmpty()) {
+ for (TreeWalk.TreeIterator sub = iters.remove(iters.size() - 1);
+ sub.hasNext();) {
+ TreePath p = sub.next();
+ if (0 == (r.nextInt() % 4)) {
+ iters.add(sub.fork());
+ Collections.shuffle(iters, r);
+ }
+ p.accept(j++);
+ assertTrue(ns.remove(p.getFileStatus()));
+ }
+ }
+ assertTrue(ns.isEmpty());
+ }
+
+ @Test
+ public void testRandomRootWalk() throws Exception {
+ Set ns = new HashSet<>();
+ final long seed = r.nextLong();
+ Path root = new Path("foo://bar:4344/dingos");
+ String sroot = root.toString();
+ int nroot = sroot.length();
+ RandomTreeWalk t1 = new RandomTreeWalk(root, seed, 10, .1f);
+ int i = 0;
+ for (TreePath p : t1) {
+ p.accept(i++);
+ FileStatus stat = p.getFileStatus();
+ assertTrue(ns.add(stat));
+ assertEquals(sroot, stat.getPath().toString().substring(0, nroot));
+ }
+
+ RandomTreeWalk t2 = new RandomTreeWalk(root, seed, 10, .1f);
+ int j = 0;
+ for (TreePath p : t2) {
+ p.accept(j++);
+ FileStatus stat = p.getFileStatus();
+ assertTrue(ns.remove(stat));
+ assertEquals(sroot, stat.getPath().toString().substring(0, nroot));
+ }
+ assertTrue(ns.isEmpty());
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java
new file mode 100644
index 0000000000..9aef106371
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import static org.junit.Assert.*;
+
+/**
+ * Validate resolver assigning all paths to a single owner/group.
+ */
+public class TestSingleUGIResolver {
+
+ @Rule public TestName name = new TestName();
+
+ private static final int TESTUID = 10101;
+ private static final int TESTGID = 10102;
+ private static final String TESTUSER = "tenaqvyybdhragqvatbf";
+ private static final String TESTGROUP = "tnyybcvatlnxf";
+
+ private SingleUGIResolver ugi = new SingleUGIResolver();
+
+ @Before
+ public void setup() {
+ Configuration conf = new Configuration(false);
+ conf.setInt(SingleUGIResolver.UID, TESTUID);
+ conf.setInt(SingleUGIResolver.GID, TESTGID);
+ conf.set(SingleUGIResolver.USER, TESTUSER);
+ conf.set(SingleUGIResolver.GROUP, TESTGROUP);
+ ugi.setConf(conf);
+ System.out.println(name.getMethodName());
+ }
+
+ @Test
+ public void testRewrite() {
+ FsPermission p1 = new FsPermission((short)0755);
+ match(ugi.resolve(file("dingo", "dingo", p1)), p1);
+ match(ugi.resolve(file(TESTUSER, "dingo", p1)), p1);
+ match(ugi.resolve(file("dingo", TESTGROUP, p1)), p1);
+ match(ugi.resolve(file(TESTUSER, TESTGROUP, p1)), p1);
+
+ FsPermission p2 = new FsPermission((short)0x8000);
+ match(ugi.resolve(file("dingo", "dingo", p2)), p2);
+ match(ugi.resolve(file(TESTUSER, "dingo", p2)), p2);
+ match(ugi.resolve(file("dingo", TESTGROUP, p2)), p2);
+ match(ugi.resolve(file(TESTUSER, TESTGROUP, p2)), p2);
+
+ Map ids = ugi.ugiMap();
+ assertEquals(2, ids.size());
+ assertEquals(TESTUSER, ids.get(10101));
+ assertEquals(TESTGROUP, ids.get(10102));
+ }
+
+ @Test
+ public void testDefault() {
+ String user;
+ try {
+ user = UserGroupInformation.getCurrentUser().getShortUserName();
+ } catch (IOException e) {
+ user = "hadoop";
+ }
+ Configuration conf = new Configuration(false);
+ ugi.setConf(conf);
+ Map ids = ugi.ugiMap();
+ assertEquals(2, ids.size());
+ assertEquals(user, ids.get(0));
+ assertEquals(user, ids.get(1));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testInvalidUid() {
+ Configuration conf = ugi.getConf();
+ conf.setInt(SingleUGIResolver.UID, (1 << 24) + 1);
+ ugi.setConf(conf);
+ ugi.resolve(file(TESTUSER, TESTGROUP, new FsPermission((short)0777)));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testInvalidGid() {
+ Configuration conf = ugi.getConf();
+ conf.setInt(SingleUGIResolver.GID, (1 << 24) + 1);
+ ugi.setConf(conf);
+ ugi.resolve(file(TESTUSER, TESTGROUP, new FsPermission((short)0777)));
+ }
+
+ @Test(expected=IllegalStateException.class)
+ public void testDuplicateIds() {
+ Configuration conf = new Configuration(false);
+ conf.setInt(SingleUGIResolver.UID, 4344);
+ conf.setInt(SingleUGIResolver.GID, 4344);
+ conf.set(SingleUGIResolver.USER, TESTUSER);
+ conf.set(SingleUGIResolver.GROUP, TESTGROUP);
+ ugi.setConf(conf);
+ ugi.ugiMap();
+ }
+
+ static void match(long encoded, FsPermission p) {
+ assertEquals(p, new FsPermission((short)(encoded & 0xFFFF)));
+ long uid = (encoded >>> UGIResolver.USER_STRID_OFFSET);
+ uid &= UGIResolver.USER_GROUP_STRID_MASK;
+ assertEquals(TESTUID, uid);
+ long gid = (encoded >>> UGIResolver.GROUP_STRID_OFFSET);
+ gid &= UGIResolver.USER_GROUP_STRID_MASK;
+ assertEquals(TESTGID, gid);
+ }
+
+ static FileStatus file(String user, String group, FsPermission perm) {
+ Path p = new Path("foo://bar:4344/baz/dingo");
+ return new FileStatus(
+ 4344 * (1 << 20), /* long length, */
+ false, /* boolean isdir, */
+ 1, /* int block_replication, */
+ 256 * (1 << 20), /* long blocksize, */
+ 0L, /* long modification_time, */
+ 0L, /* long access_time, */
+ perm, /* FsPermission permission, */
+ user, /* String owner, */
+ group, /* String group, */
+ p); /* Path path */
+ }
+
+}
diff --git a/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties b/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties
new file mode 100644
index 0000000000..2ebf29e2b6
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=INFO,stdout
+log4j.threshold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
+
diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml
index 28faa9f0ee..4b903614c7 100644
--- a/hadoop-tools/hadoop-tools-dist/pom.xml
+++ b/hadoop-tools/hadoop-tools-dist/pom.xml
@@ -128,6 +128,12 @@
compile
${project.version}
+
+ org.apache.hadoop
+ hadoop-fs2img
+ compile
+ ${project.version}
+
diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml
index 6f95f117ef..c0300459d8 100644
--- a/hadoop-tools/pom.xml
+++ b/hadoop-tools/pom.xml
@@ -48,6 +48,7 @@
hadoop-kafka
hadoop-azure-datalake
hadoop-aliyun
+ hadoop-fs2img