From 8da3a6e314609f9124bd9979cd09cddbc2a10d36 Mon Sep 17 00:00:00 2001 From: Virajith Jalaparti Date: Sat, 15 Apr 2017 12:15:08 -0700 Subject: [PATCH] HDFS-10706. [READ] Add tool generating FSImage from external store --- hadoop-tools/hadoop-fs2img/pom.xml | 87 +++ .../hdfs/server/namenode/BlockResolver.java | 95 +++ .../hdfs/server/namenode/FSTreeWalk.java | 105 +++ .../hdfs/server/namenode/FileSystemImage.java | 139 ++++ .../FixedBlockMultiReplicaResolver.java | 44 ++ .../server/namenode/FixedBlockResolver.java | 93 +++ .../hdfs/server/namenode/FsUGIResolver.java | 58 ++ .../hdfs/server/namenode/ImageWriter.java | 600 ++++++++++++++++++ .../hdfs/server/namenode/NullBlockFormat.java | 87 +++ .../server/namenode/SingleUGIResolver.java | 90 +++ .../hadoop/hdfs/server/namenode/TreePath.java | 167 +++++ .../hadoop/hdfs/server/namenode/TreeWalk.java | 103 +++ .../hdfs/server/namenode/UGIResolver.java | 131 ++++ .../hdfs/server/namenode/package-info.java | 23 + .../hdfs/server/namenode/RandomTreeWalk.java | 186 ++++++ .../namenode/TestFixedBlockResolver.java | 121 ++++ .../server/namenode/TestRandomTreeWalk.java | 130 ++++ .../namenode/TestSingleUGIResolver.java | 148 +++++ .../src/test/resources/log4j.properties | 24 + hadoop-tools/hadoop-tools-dist/pom.xml | 6 + hadoop-tools/pom.xml | 1 + 21 files changed, 2438 insertions(+) create mode 100644 hadoop-tools/hadoop-fs2img/pom.xml create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java create mode 100644 hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java create mode 100644 hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java create mode 100644 hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java create mode 100644 hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties diff --git a/hadoop-tools/hadoop-fs2img/pom.xml b/hadoop-tools/hadoop-fs2img/pom.xml new file mode 100644 index 0000000000..36096b7d9f --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/pom.xml @@ -0,0 +1,87 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 3.0.0-alpha3-SNAPSHOT + ../../hadoop-project + + org.apache.hadoop + hadoop-fs2img + 3.0.0-alpha3-SNAPSHOT + fs2img + fs2img + jar + + + ${project.build.directory}/log + + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-hdfs + provided + + + org.apache.hadoop + hadoop-minicluster + provided + + + com.google.protobuf + protobuf-java + provided + + + commons-cli + commons-cli + + + junit + junit + test + + + org.mockito + mockito-all + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.hadoop.hdfs.server.namenode.FileSystemImage + + + + + + + + diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java new file mode 100644 index 0000000000..94b92b833f --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; + +/** + * Given an external reference, create a sequence of blocks and associated + * metadata. + */ +public abstract class BlockResolver { + + protected BlockProto buildBlock(long blockId, long bytes) { + return buildBlock(blockId, bytes, 1001); + } + + protected BlockProto buildBlock(long blockId, long bytes, long genstamp) { + BlockProto.Builder b = BlockProto.newBuilder() + .setBlockId(blockId) + .setNumBytes(bytes) + .setGenStamp(genstamp); + return b.build(); + } + + /** + * @param s the external reference. + * @return sequence of blocks that make up the reference. + */ + public Iterable resolve(FileStatus s) { + List lengths = blockLengths(s); + ArrayList ret = new ArrayList<>(lengths.size()); + long tot = 0; + for (long l : lengths) { + tot += l; + ret.add(buildBlock(nextId(), l)); + } + if (tot != s.getLen()) { + // log a warning? + throw new IllegalStateException( + "Expected " + s.getLen() + " found " + tot); + } + return ret; + } + + /** + * @return the next block id. + */ + public abstract long nextId(); + + /** + * @return the maximum sequentially allocated block ID for this filesystem. + */ + protected abstract long lastId(); + + /** + * @param status the external reference. + * @return the lengths of the resultant blocks. + */ + protected abstract List blockLengths(FileStatus status); + + + /** + * @param status the external reference. + * @return the block size to assign to this external reference. + */ + public long preferredBlockSize(FileStatus status) { + return status.getBlockSize(); + } + + /** + * @param status the external reference. + * @return the replication to assign to this external reference. + */ + public abstract int getReplication(FileStatus status); + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java new file mode 100644 index 0000000000..f73611244d --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.ConcurrentModificationException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Traversal of an external FileSystem. + */ +public class FSTreeWalk extends TreeWalk { + + private final Path root; + private final FileSystem fs; + + public FSTreeWalk(Path root, Configuration conf) throws IOException { + this.root = root; + fs = root.getFileSystem(conf); + } + + @Override + protected Iterable getChildren(TreePath path, long id, + TreeIterator i) { + // TODO symlinks + if (!path.getFileStatus().isDirectory()) { + return Collections.emptyList(); + } + try { + ArrayList ret = new ArrayList<>(); + for (FileStatus s : fs.listStatus(path.getFileStatus().getPath())) { + ret.add(new TreePath(s, id, i)); + } + return ret; + } catch (FileNotFoundException e) { + throw new ConcurrentModificationException("FS modified"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + class FSTreeIterator extends TreeIterator { + + private FSTreeIterator() { + } + + FSTreeIterator(TreePath p) { + getPendingQueue().addFirst( + new TreePath(p.getFileStatus(), p.getParentId(), this)); + } + + FSTreeIterator(Path p) throws IOException { + try { + FileStatus s = fs.getFileStatus(root); + getPendingQueue().addFirst(new TreePath(s, -1L, this)); + } catch (FileNotFoundException e) { + if (p.equals(root)) { + throw e; + } + throw new ConcurrentModificationException("FS modified"); + } + } + + @Override + public TreeIterator fork() { + if (getPendingQueue().isEmpty()) { + return new FSTreeIterator(); + } + return new FSTreeIterator(getPendingQueue().removeFirst()); + } + + } + + @Override + public TreeIterator iterator() { + try { + return new FSTreeIterator(root); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java new file mode 100644 index 0000000000..e1e85c1a64 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.File; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.common.BlockFormat; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * Create FSImage from an external namespace. + */ +public class FileSystemImage implements Tool { + + private Configuration conf; + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + // require absolute URI to write anywhere but local + FileSystem.setDefaultUri(conf, new File(".").toURI().toString()); + } + + protected void printUsage() { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("fs2img [OPTIONS] URI", new Options()); + formatter.setSyntaxPrefix(""); + formatter.printHelp("Options", options()); + ToolRunner.printGenericCommandUsage(System.out); + } + + static Options options() { + Options options = new Options(); + options.addOption("o", "outdir", true, "Output directory"); + options.addOption("u", "ugiclass", true, "UGI resolver class"); + options.addOption("b", "blockclass", true, "Block output class"); + options.addOption("i", "blockidclass", true, "Block resolver class"); + options.addOption("c", "cachedirs", true, "Max active dirents"); + options.addOption("h", "help", false, "Print usage"); + return options; + } + + @Override + public int run(String[] argv) throws Exception { + Options options = options(); + CommandLineParser parser = new PosixParser(); + CommandLine cmd; + try { + cmd = parser.parse(options, argv); + } catch (ParseException e) { + System.out.println( + "Error parsing command-line options: " + e.getMessage()); + printUsage(); + return -1; + } + + if (cmd.hasOption("h")) { + printUsage(); + return -1; + } + + ImageWriter.Options opts = + ReflectionUtils.newInstance(ImageWriter.Options.class, getConf()); + for (Option o : cmd.getOptions()) { + switch (o.getOpt()) { + case "o": + opts.output(o.getValue()); + break; + case "u": + opts.ugi(Class.forName(o.getValue()).asSubclass(UGIResolver.class)); + break; + case "b": + opts.blocks( + Class.forName(o.getValue()).asSubclass(BlockFormat.class)); + break; + case "i": + opts.blockIds( + Class.forName(o.getValue()).asSubclass(BlockResolver.class)); + break; + case "c": + opts.cache(Integer.parseInt(o.getValue())); + break; + default: + throw new UnsupportedOperationException("Internal error"); + } + } + + String[] rem = cmd.getArgs(); + if (rem.length != 1) { + printUsage(); + return -1; + } + + try (ImageWriter w = new ImageWriter(opts)) { + for (TreePath e : new FSTreeWalk(new Path(rem[0]), getConf())) { + w.accept(e); // add and continue + } + } + return 0; + } + + public static void main(String[] argv) throws Exception { + int ret = ToolRunner.run(new FileSystemImage(), argv); + System.exit(ret); + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java new file mode 100644 index 0000000000..0c8ce6e8df --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; + +/** + * Resolver mapping all files to a configurable, uniform blocksize + * and replication. + */ +public class FixedBlockMultiReplicaResolver extends FixedBlockResolver { + + public static final String REPLICATION = + "hdfs.image.writer.resolver.fixed.block.replication"; + + private int replication; + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + replication = conf.getInt(REPLICATION, 1); + } + + public int getReplication(FileStatus s) { + return replication; + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java new file mode 100644 index 0000000000..8ff9695998 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; + +/** + * Resolver mapping all files to a configurable, uniform blocksize. + */ +public class FixedBlockResolver extends BlockResolver implements Configurable { + + public static final String BLOCKSIZE = + "hdfs.image.writer.resolver.fixed.block.size"; + public static final String START_BLOCK = + "hdfs.image.writer.resolver.fixed.block.start"; + + private Configuration conf; + private long blocksize = 256 * (1L << 20); + private final AtomicLong blockIds = new AtomicLong(0); + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + blocksize = conf.getLong(BLOCKSIZE, 256 * (1L << 20)); + blockIds.set(conf.getLong(START_BLOCK, (1L << 30))); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + protected List blockLengths(FileStatus s) { + ArrayList ret = new ArrayList<>(); + if (!s.isFile()) { + return ret; + } + if (0 == s.getLen()) { + // the file has length 0; so we will have one block of size 0 + ret.add(0L); + return ret; + } + int nblocks = (int)((s.getLen() - 1) / blocksize) + 1; + for (int i = 0; i < nblocks - 1; ++i) { + ret.add(blocksize); + } + long rem = s.getLen() % blocksize; + ret.add(0 == (rem % blocksize) ? blocksize : rem); + return ret; + } + + @Override + public long nextId() { + return blockIds.incrementAndGet(); + } + + @Override + public long lastId() { + return blockIds.get(); + } + + @Override + public long preferredBlockSize(FileStatus s) { + return blocksize; + } + + @Override + public int getReplication(FileStatus s) { + return 1; + } +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java new file mode 100644 index 0000000000..ca16d962fe --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.HashSet; +import java.util.Set; + +/** + * Dynamically assign ids to users/groups as they appear in the external + * filesystem. + */ +public class FsUGIResolver extends UGIResolver { + + private int id; + private final Set usernames; + private final Set groupnames; + + FsUGIResolver() { + super(); + id = 0; + usernames = new HashSet(); + groupnames = new HashSet(); + } + + @Override + public synchronized void addUser(String name) { + if (!usernames.contains(name)) { + addUser(name, id); + id++; + usernames.add(name); + } + } + + @Override + public synchronized void addGroup(String name) { + if (!groupnames.contains(name)) { + addGroup(name, id); + id++; + groupnames.add(name); + } + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java new file mode 100644 index 0000000000..a3603a118e --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java @@ -0,0 +1,600 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FilterOutputStream; +import java.io.OutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.base.Charsets; +import com.google.protobuf.CodedOutputStream; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.common.BlockFormat; +import org.apache.hadoop.hdfs.server.common.FileRegion; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection.DirEntry; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; +import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressorStream; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; + +import static org.apache.hadoop.hdfs.server.namenode.FSImageUtil.MAGIC_HEADER; + +/** + * Utility crawling an existing hierarchical FileSystem and emitting + * a valid FSImage/NN storage. + */ +// TODO: generalize to types beyond FileRegion +public class ImageWriter implements Closeable { + + private static final int ONDISK_VERSION = 1; + private static final int LAYOUT_VERSION = -64; // see NameNodeLayoutVersion + + private final Path outdir; + private final FileSystem outfs; + private final File dirsTmp; + private final OutputStream dirs; + private final File inodesTmp; + private final OutputStream inodes; + private final MessageDigest digest; + private final FSImageCompression compress; + private final long startBlock; + private final long startInode; + private final UGIResolver ugis; + private final BlockFormat.Writer blocks; + private final BlockResolver blockIds; + private final Map dircache; + private final TrackedOutputStream raw; + + private boolean closed = false; + private long curSec; + private long curBlock; + private final AtomicLong curInode; + private final FileSummary.Builder summary = FileSummary.newBuilder() + .setOndiskVersion(ONDISK_VERSION) + .setLayoutVersion(LAYOUT_VERSION); + + private final String blockPoolID; + + public static Options defaults() { + return new Options(); + } + + @SuppressWarnings("unchecked") + public ImageWriter(Options opts) throws IOException { + final OutputStream out; + if (null == opts.outStream) { + FileSystem fs = opts.outdir.getFileSystem(opts.getConf()); + outfs = (fs instanceof LocalFileSystem) + ? ((LocalFileSystem)fs).getRaw() + : fs; + Path tmp = opts.outdir; + if (!outfs.mkdirs(tmp)) { + throw new IOException("Failed to create output dir: " + tmp); + } + try (NNStorage stor = new NNStorage(opts.getConf(), + Arrays.asList(tmp.toUri()), Arrays.asList(tmp.toUri()))) { + NamespaceInfo info = NNStorage.newNamespaceInfo(); + if (info.getLayoutVersion() != LAYOUT_VERSION) { + throw new IllegalStateException("Incompatible layout " + + info.getLayoutVersion() + " (expected " + LAYOUT_VERSION); + } + stor.format(info); + blockPoolID = info.getBlockPoolID(); + } + outdir = new Path(tmp, "current"); + out = outfs.create(new Path(outdir, "fsimage_0000000000000000000")); + } else { + // XXX necessary? writing a NNStorage now... + outdir = null; + outfs = null; + out = opts.outStream; + blockPoolID = ""; + } + digest = MD5Hash.getDigester(); + raw = new TrackedOutputStream<>(new DigestOutputStream( + new BufferedOutputStream(out), digest)); + compress = opts.compress; + CompressionCodec codec = compress.getImageCodec(); + if (codec != null) { + summary.setCodec(codec.getClass().getCanonicalName()); + } + startBlock = opts.startBlock; + curBlock = startBlock; + startInode = opts.startInode; + curInode = new AtomicLong(startInode); + dircache = Collections.synchronizedMap(new DirEntryCache(opts.maxdircache)); + + ugis = null == opts.ugis + ? ReflectionUtils.newInstance(opts.ugisClass, opts.getConf()) + : opts.ugis; + BlockFormat fmt = null == opts.blocks + ? ReflectionUtils.newInstance(opts.blockFormatClass, opts.getConf()) + : opts.blocks; + blocks = fmt.getWriter(null); + blockIds = null == opts.blockIds + ? ReflectionUtils.newInstance(opts.blockIdsClass, opts.getConf()) + : opts.blockIds; + + // create directory and inode sections as side-files. + // The details are written to files to avoid keeping them in memory. + dirsTmp = File.createTempFile("fsimg_dir", null); + dirsTmp.deleteOnExit(); + dirs = beginSection(new FileOutputStream(dirsTmp)); + try { + inodesTmp = File.createTempFile("fsimg_inode", null); + inodesTmp.deleteOnExit(); + inodes = new FileOutputStream(inodesTmp); + } catch (IOException e) { + // appropriate to close raw? + IOUtils.cleanup(null, raw, dirs); + throw e; + } + + raw.write(MAGIC_HEADER); + curSec = raw.pos; + assert raw.pos == MAGIC_HEADER.length; + } + + public void accept(TreePath e) throws IOException { + assert e.getParentId() < curInode.get(); + // allocate ID + long id = curInode.getAndIncrement(); + e.accept(id); + assert e.getId() < curInode.get(); + INode n = e.toINode(ugis, blockIds, blocks, blockPoolID); + writeInode(n); + + if (e.getParentId() > 0) { + // add DirEntry to map, which may page out entries + DirEntry.Builder de = DirEntry.newBuilder() + .setParent(e.getParentId()) + .addChildren(e.getId()); + dircache.put(e.getParentId(), de); + } + } + + @SuppressWarnings("serial") + class DirEntryCache extends LinkedHashMap { + + // should cache path to root, not evict LRCached + private final int nEntries; + + DirEntryCache(int nEntries) { + this.nEntries = nEntries; + } + + @Override + public DirEntry.Builder put(Long p, DirEntry.Builder b) { + DirEntry.Builder e = get(p); + if (null == e) { + return super.put(p, b); + } + //merge + e.addAllChildren(b.getChildrenList()); + // not strictly conforming + return e; + } + + @Override + protected boolean removeEldestEntry(Entry be) { + if (size() > nEntries) { + DirEntry d = be.getValue().build(); + try { + writeDirEntry(d); + } catch (IOException e) { + throw new RuntimeException(e); + } + return true; + } + return false; + } + } + + synchronized void writeInode(INode n) throws IOException { + n.writeDelimitedTo(inodes); + } + + synchronized void writeDirEntry(DirEntry e) throws IOException { + e.writeDelimitedTo(dirs); + } + + // from FSImageFormatProtobuf... why not just read position from the stream? + private static int getOndiskSize(com.google.protobuf.GeneratedMessage s) { + return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize()) + + s.getSerializedSize(); + } + + @Override + public synchronized void close() throws IOException { + if (closed) { + return; + } + for (DirEntry.Builder b : dircache.values()) { + DirEntry e = b.build(); + writeDirEntry(e); + } + dircache.clear(); + + // close side files + IOUtils.cleanup(null, dirs, inodes, blocks); + if (null == dirs || null == inodes) { + // init failed + if (raw != null) { + raw.close(); + } + return; + } + try { + writeNameSystemSection(); + writeINodeSection(); + writeDirSection(); + writeStringTableSection(); + + // write summary directly to raw + FileSummary s = summary.build(); + s.writeDelimitedTo(raw); + int length = getOndiskSize(s); + byte[] lengthBytes = new byte[4]; + ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length); + raw.write(lengthBytes); + } finally { + raw.close(); + } + writeMD5("fsimage_0000000000000000000"); + closed = true; + } + + /** + * Write checksum for image file. Pulled from MD5Utils/internals. Awkward to + * reuse existing tools/utils. + */ + void writeMD5(String imagename) throws IOException { + if (null == outdir) { + //LOG.warn("Not writing MD5"); + return; + } + MD5Hash md5 = new MD5Hash(digest.digest()); + String digestString = StringUtils.byteToHexString(md5.getDigest()); + Path chk = new Path(outdir, imagename + ".md5"); + try (OutputStream out = outfs.create(chk)) { + String md5Line = digestString + " *" + imagename + "\n"; + out.write(md5Line.getBytes(Charsets.UTF_8)); + } + } + + OutputStream beginSection(OutputStream out) throws IOException { + CompressionCodec codec = compress.getImageCodec(); + if (null == codec) { + return out; + } + return codec.createOutputStream(out); + } + + void endSection(OutputStream out, SectionName name) throws IOException { + CompressionCodec codec = compress.getImageCodec(); + if (codec != null) { + ((CompressorStream)out).finish(); + } + out.flush(); + long length = raw.pos - curSec; + summary.addSections(FileSummary.Section.newBuilder() + .setName(name.toString()) // not strictly correct, but name not visible + .setOffset(curSec).setLength(length)); + curSec += length; + } + + void writeNameSystemSection() throws IOException { + NameSystemSection.Builder b = NameSystemSection.newBuilder() + .setGenstampV1(1000) + .setGenstampV1Limit(0) + .setGenstampV2(1001) + .setLastAllocatedBlockId(blockIds.lastId()) + .setTransactionId(0); + NameSystemSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + endSection(sec, SectionName.NS_INFO); + } + + void writeINodeSection() throws IOException { + // could reset dict to avoid compression cost in close + INodeSection.Builder b = INodeSection.newBuilder() + .setNumInodes(curInode.get() - startInode) + .setLastInodeId(curInode.get()); + INodeSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + // copy inodes + try (FileInputStream in = new FileInputStream(inodesTmp)) { + IOUtils.copyBytes(in, sec, 4096, false); + } + endSection(sec, SectionName.INODE); + } + + void writeDirSection() throws IOException { + // No header, so dirs can be written/compressed independently + //INodeDirectorySection.Builder b = INodeDirectorySection.newBuilder(); + OutputStream sec = raw; + // copy dirs + try (FileInputStream in = new FileInputStream(dirsTmp)) { + IOUtils.copyBytes(in, sec, 4096, false); + } + endSection(sec, SectionName.INODE_DIR); + } + + void writeFilesUCSection() throws IOException { + FilesUnderConstructionSection.Builder b = + FilesUnderConstructionSection.newBuilder(); + FilesUnderConstructionSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + endSection(sec, SectionName.FILES_UNDERCONSTRUCTION); + } + + void writeSnapshotDiffSection() throws IOException { + SnapshotDiffSection.Builder b = SnapshotDiffSection.newBuilder(); + SnapshotDiffSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + endSection(sec, SectionName.SNAPSHOT_DIFF); + } + + void writeSecretManagerSection() throws IOException { + SecretManagerSection.Builder b = SecretManagerSection.newBuilder() + .setCurrentId(0) + .setTokenSequenceNumber(0); + SecretManagerSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + endSection(sec, SectionName.SECRET_MANAGER); + } + + void writeCacheManagerSection() throws IOException { + CacheManagerSection.Builder b = CacheManagerSection.newBuilder() + .setNumPools(0) + .setNumDirectives(0) + .setNextDirectiveId(1); + CacheManagerSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + endSection(sec, SectionName.CACHE_MANAGER); + } + + void writeStringTableSection() throws IOException { + StringTableSection.Builder b = StringTableSection.newBuilder(); + Map u = ugis.ugiMap(); + b.setNumEntry(u.size()); + StringTableSection s = b.build(); + + OutputStream sec = beginSection(raw); + s.writeDelimitedTo(sec); + for (Map.Entry e : u.entrySet()) { + StringTableSection.Entry.Builder x = + StringTableSection.Entry.newBuilder() + .setId(e.getKey()) + .setStr(e.getValue()); + x.build().writeDelimitedTo(sec); + } + endSection(sec, SectionName.STRING_TABLE); + } + + @Override + public synchronized String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("{ codec=\"").append(compress.getImageCodec()); + sb.append("\", startBlock=").append(startBlock); + sb.append(", curBlock=").append(curBlock); + sb.append(", startInode=").append(startInode); + sb.append(", curInode=").append(curInode); + sb.append(", ugi=").append(ugis); + sb.append(", blockIds=").append(blockIds); + sb.append(", offset=").append(raw.pos); + sb.append(" }"); + return sb.toString(); + } + + static class TrackedOutputStream + extends FilterOutputStream { + + private long pos = 0L; + + TrackedOutputStream(T out) { + super(out); + } + + @SuppressWarnings("unchecked") + public T getInner() { + return (T) out; + } + + @Override + public void write(int b) throws IOException { + out.write(b); + ++pos; + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + pos += len; + } + + @Override + public void flush() throws IOException { + super.flush(); + } + + @Override + public void close() throws IOException { + super.close(); + } + + } + + /** + * Configurable options for image generation mapping pluggable components. + */ + public static class Options implements Configurable { + + public static final String START_INODE = "hdfs.image.writer.start.inode"; + public static final String CACHE_ENTRY = "hdfs.image.writer.cache.entries"; + public static final String UGI_CLASS = "hdfs.image.writer.ugi.class"; + public static final String BLOCK_RESOLVER_CLASS = + "hdfs.image.writer.blockresolver.class"; + + private Path outdir; + private Configuration conf; + private OutputStream outStream; + private int maxdircache; + private long startBlock; + private long startInode; + private UGIResolver ugis; + private Class ugisClass; + private BlockFormat blocks; + + @SuppressWarnings("rawtypes") + private Class blockFormatClass; + private BlockResolver blockIds; + private Class blockIdsClass; + private FSImageCompression compress = + FSImageCompression.createNoopCompression(); + + protected Options() { + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + //long lastTxn = conf.getLong(LAST_TXN, 0L); + String def = new File("hdfs/name").toURI().toString(); + outdir = new Path(conf.get(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, def)); + startBlock = conf.getLong(FixedBlockResolver.START_BLOCK, (1L << 30) + 1); + startInode = conf.getLong(START_INODE, (1L << 14) + 1); + maxdircache = conf.getInt(CACHE_ENTRY, 100); + ugisClass = conf.getClass(UGI_CLASS, + SingleUGIResolver.class, UGIResolver.class); + blockFormatClass = conf.getClass( + DFSConfigKeys.DFS_PROVIDER_BLK_FORMAT_CLASS, + NullBlockFormat.class, BlockFormat.class); + blockIdsClass = conf.getClass(BLOCK_RESOLVER_CLASS, + FixedBlockResolver.class, BlockResolver.class); + } + + @Override + public Configuration getConf() { + return conf; + } + + public Options output(String out) { + this.outdir = new Path(out); + return this; + } + + public Options outStream(OutputStream outStream) { + this.outStream = outStream; + return this; + } + + public Options codec(String codec) throws IOException { + this.compress = FSImageCompression.createCompression(getConf(), codec); + return this; + } + + public Options cache(int nDirEntries) { + this.maxdircache = nDirEntries; + return this; + } + + public Options ugi(UGIResolver ugis) { + this.ugis = ugis; + return this; + } + + public Options ugi(Class ugisClass) { + this.ugisClass = ugisClass; + return this; + } + + public Options blockIds(BlockResolver blockIds) { + this.blockIds = blockIds; + return this; + } + + public Options blockIds(Class blockIdsClass) { + this.blockIdsClass = blockIdsClass; + return this; + } + + public Options blocks(BlockFormat blocks) { + this.blocks = blocks; + return this; + } + + @SuppressWarnings("rawtypes") + public Options blocks(Class blocksClass) { + this.blockFormatClass = blocksClass; + return this; + } + + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java new file mode 100644 index 0000000000..aabdf74820 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.server.common.BlockFormat; +import org.apache.hadoop.hdfs.server.common.BlockFormat.Reader.Options; +import org.apache.hadoop.hdfs.server.common.FileRegion; + +/** + * Null sink for region information emitted from FSImage. + */ +public class NullBlockFormat extends BlockFormat { + + @Override + public Reader getReader(Options opts) throws IOException { + return new Reader() { + @Override + public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return false; + } + @Override + public FileRegion next() { + throw new NoSuchElementException(); + } + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public void close() throws IOException { + // do nothing + } + + @Override + public FileRegion resolve(Block ident) throws IOException { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public Writer getWriter(Writer.Options opts) throws IOException { + return new Writer() { + @Override + public void store(FileRegion token) throws IOException { + // do nothing + } + + @Override + public void close() throws IOException { + // do nothing + } + }; + } + + @Override + public void refresh() throws IOException { + // do nothing + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java new file mode 100644 index 0000000000..0fd3f2b138 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * Map all owners/groups in external system to a single user in FSImage. + */ +public class SingleUGIResolver extends UGIResolver implements Configurable { + + public static final String UID = "hdfs.image.writer.ugi.single.uid"; + public static final String USER = "hdfs.image.writer.ugi.single.user"; + public static final String GID = "hdfs.image.writer.ugi.single.gid"; + public static final String GROUP = "hdfs.image.writer.ugi.single.group"; + + private int uid; + private int gid; + private String user; + private String group; + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + uid = conf.getInt(UID, 0); + user = conf.get(USER); + if (null == user) { + try { + user = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + user = "hadoop"; + } + } + gid = conf.getInt(GID, 1); + group = conf.get(GROUP); + if (null == group) { + group = user; + } + + resetUGInfo(); + addUser(user, uid); + addGroup(group, gid); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public String user(FileStatus s) { + return user; + } + + @Override + public String group(FileStatus s) { + return group; + } + + @Override + public void addUser(String name) { + //do nothing + } + + @Override + public void addGroup(String name) { + //do nothing + } +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java new file mode 100644 index 0000000000..14e6bed07f --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import com.google.protobuf.ByteString; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; +import org.apache.hadoop.hdfs.server.common.BlockFormat; +import org.apache.hadoop.hdfs.server.common.FileRegion; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile; +import static org.apache.hadoop.hdfs.DFSUtil.string2Bytes; +import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA; +import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA; + +/** + * Traversal cursor in external filesystem. + * TODO: generalize, move FS/FileRegion to FSTreePath + */ +public class TreePath { + private long id = -1; + private final long parentId; + private final FileStatus stat; + private final TreeWalk.TreeIterator i; + + protected TreePath(FileStatus stat, long parentId, TreeWalk.TreeIterator i) { + this.i = i; + this.stat = stat; + this.parentId = parentId; + } + + public FileStatus getFileStatus() { + return stat; + } + + public long getParentId() { + return parentId; + } + + public long getId() { + if (id < 0) { + throw new IllegalStateException(); + } + return id; + } + + void accept(long id) { + this.id = id; + i.onAccept(this, id); + } + + public INode toINode(UGIResolver ugi, BlockResolver blk, + BlockFormat.Writer out, String blockPoolID) + throws IOException { + if (stat.isFile()) { + return toFile(ugi, blk, out, blockPoolID); + } else if (stat.isDirectory()) { + return toDirectory(ugi); + } else if (stat.isSymlink()) { + throw new UnsupportedOperationException("symlinks not supported"); + } else { + throw new UnsupportedOperationException("Unknown type: " + stat); + } + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof TreePath)) { + return false; + } + TreePath o = (TreePath) other; + return getParentId() == o.getParentId() + && getFileStatus().equals(o.getFileStatus()); + } + + @Override + public int hashCode() { + long pId = getParentId() * getFileStatus().hashCode(); + return (int)(pId ^ (pId >>> 32)); + } + + void writeBlock(long blockId, long offset, long length, + long genStamp, String blockPoolID, + BlockFormat.Writer out) throws IOException { + FileStatus s = getFileStatus(); + out.store(new FileRegion(blockId, s.getPath(), offset, length, + blockPoolID, genStamp)); + } + + INode toFile(UGIResolver ugi, BlockResolver blk, + BlockFormat.Writer out, String blockPoolID) + throws IOException { + final FileStatus s = getFileStatus(); + // TODO should this store resolver's user/group? + ugi.addUser(s.getOwner()); + ugi.addGroup(s.getGroup()); + INodeFile.Builder b = INodeFile.newBuilder() + .setReplication(blk.getReplication(s)) + .setModificationTime(s.getModificationTime()) + .setAccessTime(s.getAccessTime()) + .setPreferredBlockSize(blk.preferredBlockSize(s)) + .setPermission(ugi.resolve(s)) + .setStoragePolicyID(HdfsConstants.PROVIDED_STORAGE_POLICY_ID); + //TODO: storage policy should be configurable per path; use BlockResolver + long off = 0L; + for (BlockProto block : blk.resolve(s)) { + b.addBlocks(block); + writeBlock(block.getBlockId(), off, block.getNumBytes(), + block.getGenStamp(), blockPoolID, out); + off += block.getNumBytes(); + } + INode.Builder ib = INode.newBuilder() + .setType(INode.Type.FILE) + .setId(id) + .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName()))) + .setFile(b); + return ib.build(); + } + + INode toDirectory(UGIResolver ugi) { + final FileStatus s = getFileStatus(); + ugi.addUser(s.getOwner()); + ugi.addGroup(s.getGroup()); + INodeDirectory.Builder b = INodeDirectory.newBuilder() + .setModificationTime(s.getModificationTime()) + .setNsQuota(DEFAULT_NAMESPACE_QUOTA) + .setDsQuota(DEFAULT_STORAGE_SPACE_QUOTA) + .setPermission(ugi.resolve(s)); + INode.Builder ib = INode.newBuilder() + .setType(INode.Type.DIRECTORY) + .setId(id) + .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName()))) + .setDirectory(b); + return ib.build(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("{ stat=\"").append(getFileStatus()).append("\""); + sb.append(", id=").append(getId()); + sb.append(", parentId=").append(getParentId()); + sb.append(", iterObjId=").append(System.identityHashCode(i)); + sb.append(" }"); + return sb.toString(); + } +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java new file mode 100644 index 0000000000..7fd26f9e50 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; + +/** + * Traversal yielding a hierarchical sequence of paths. + */ +public abstract class TreeWalk implements Iterable { + + /** + * @param path path to the node being explored. + * @param id the id of the node. + * @param iterator the {@link TreeIterator} to use. + * @return paths representing the children of the current node. + */ + protected abstract Iterable getChildren( + TreePath path, long id, TreeWalk.TreeIterator iterator); + + public abstract TreeIterator iterator(); + + /** + * Enumerator class for hierarchies. Implementations SHOULD support a fork() + * operation yielding a subtree of the current cursor. + */ + public abstract class TreeIterator implements Iterator { + + private final Deque pending; + + TreeIterator() { + this(new ArrayDeque()); + } + + protected TreeIterator(Deque pending) { + this.pending = pending; + } + + public abstract TreeIterator fork(); + + @Override + public boolean hasNext() { + return !pending.isEmpty(); + } + + @Override + public TreePath next() { + return pending.removeFirst(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + protected void onAccept(TreePath p, long id) { + for (TreePath k : getChildren(p, id, this)) { + pending.addFirst(k); + } + } + + /** + * @return the Deque containing the pending paths. + */ + protected Deque getPendingQueue() { + return pending; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("{ Treewalk=\"").append(TreeWalk.this.toString()); + sb.append(", pending=["); + Iterator i = pending.iterator(); + if (i.hasNext()) { + sb.append("\"").append(i.next()).append("\""); + } + while (i.hasNext()) { + sb.append(", \"").append(i.next()).append("\""); + } + sb.append("]"); + sb.append(" }"); + return sb.toString(); + } + } +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java new file mode 100644 index 0000000000..2d50668b68 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * Pluggable class for mapping ownership and permissions from an external + * store to an FSImage. + */ +public abstract class UGIResolver { + + static final int USER_STRID_OFFSET = 40; + static final int GROUP_STRID_OFFSET = 16; + static final long USER_GROUP_STRID_MASK = (1 << 24) - 1; + + /** + * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big + * Endian). + * The first and the second parts are the string ids of the user and + * group name, and the last 16 bits are the permission bits. + * @param owner name of owner + * @param group name of group + * @param permission Permission octects + * @return FSImage encoding of permissions + */ + protected final long buildPermissionStatus( + String owner, String group, short permission) { + + long userId = users.get(owner); + if (0L != ((~USER_GROUP_STRID_MASK) & userId)) { + throw new IllegalArgumentException("UID must fit in 24 bits"); + } + + long groupId = groups.get(group); + if (0L != ((~USER_GROUP_STRID_MASK) & groupId)) { + throw new IllegalArgumentException("GID must fit in 24 bits"); + } + return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET) + | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET) + | permission; + } + + private final Map users; + private final Map groups; + + public UGIResolver() { + this(new HashMap(), new HashMap()); + } + + UGIResolver(Map users, Map groups) { + this.users = users; + this.groups = groups; + } + + public Map ugiMap() { + Map ret = new HashMap<>(); + for (Map m : Arrays.asList(users, groups)) { + for (Map.Entry e : m.entrySet()) { + String s = ret.put(e.getValue(), e.getKey()); + if (s != null) { + throw new IllegalStateException("Duplicate mapping: " + + e.getValue() + " " + s + " " + e.getKey()); + } + } + } + return ret; + } + + public abstract void addUser(String name); + + protected void addUser(String name, int id) { + Integer uid = users.put(name, id); + if (uid != null) { + throw new IllegalArgumentException("Duplicate mapping: " + name + + " " + uid + " " + id); + } + } + + public abstract void addGroup(String name); + + protected void addGroup(String name, int id) { + Integer gid = groups.put(name, id); + if (gid != null) { + throw new IllegalArgumentException("Duplicate mapping: " + name + + " " + gid + " " + id); + } + } + + protected void resetUGInfo() { + users.clear(); + groups.clear(); + } + + public long resolve(FileStatus s) { + return buildPermissionStatus(user(s), group(s), permission(s).toShort()); + } + + public String user(FileStatus s) { + return s.getOwner(); + } + + public String group(FileStatus s) { + return s.getGroup(); + } + + public FsPermission permission(FileStatus s) { + return s.getPermission(); + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java new file mode 100644 index 0000000000..956292e4ea --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java new file mode 100644 index 0000000000..c82c489463 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; + +/** + * Random, repeatable hierarchy generator. + */ +public class RandomTreeWalk extends TreeWalk { + + private final Path root; + private final long seed; + private final float depth; + private final int children; + private final Map mSeed; + //private final AtomicLong blockIds = new AtomicLong(1L << 30); + + RandomTreeWalk(long seed) { + this(seed, 10); + } + + RandomTreeWalk(long seed, int children) { + this(seed, children, 0.15f); + } + + RandomTreeWalk(long seed, int children, float depth) { + this(randomRoot(seed), seed, children, 0.15f); + } + + RandomTreeWalk(Path root, long seed, int children, float depth) { + this.seed = seed; + this.depth = depth; + this.children = children; + mSeed = Collections.synchronizedMap(new HashMap()); + mSeed.put(-1L, seed); + this.root = root; + } + + static Path randomRoot(long seed) { + Random r = new Random(seed); + String scheme; + do { + scheme = genName(r, 3, 5).toLowerCase(); + } while (Character.isDigit(scheme.charAt(0))); + String authority = genName(r, 3, 15).toLowerCase(); + int port = r.nextInt(1 << 13) + 1000; + return new Path(scheme, authority + ":" + port, "/"); + } + + @Override + public TreeIterator iterator() { + return new RandomTreeIterator(seed); + } + + @Override + protected Iterable getChildren(TreePath p, long id, + TreeIterator walk) { + final FileStatus pFs = p.getFileStatus(); + if (pFs.isFile()) { + return Collections.emptyList(); + } + // seed is f(parent seed, attrib) + long cseed = mSeed.get(p.getParentId()) * p.getFileStatus().hashCode(); + mSeed.put(p.getId(), cseed); + Random r = new Random(cseed); + + int nChildren = r.nextInt(children); + ArrayList ret = new ArrayList(); + for (int i = 0; i < nChildren; ++i) { + ret.add(new TreePath(genFileStatus(p, r), p.getId(), walk)); + } + return ret; + } + + FileStatus genFileStatus(TreePath parent, Random r) { + final int blocksize = 128 * (1 << 20); + final Path name; + final boolean isDir; + if (null == parent) { + name = root; + isDir = true; + } else { + Path p = parent.getFileStatus().getPath(); + name = new Path(p, genName(r, 3, 10)); + isDir = r.nextFloat() < depth; + } + final long len = isDir ? 0 : r.nextInt(Integer.MAX_VALUE); + final int nblocks = 0 == len ? 0 : (((int)((len - 1) / blocksize)) + 1); + BlockLocation[] blocks = genBlocks(r, nblocks, blocksize, len); + try { + return new LocatedFileStatus(new FileStatus( + len, /* long length, */ + isDir, /* boolean isdir, */ + 1, /* int block_replication, */ + blocksize, /* long blocksize, */ + 0L, /* long modification_time, */ + 0L, /* long access_time, */ + null, /* FsPermission permission, */ + "hadoop", /* String owner, */ + "hadoop", /* String group, */ + name), /* Path path */ + blocks); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + BlockLocation[] genBlocks(Random r, int nblocks, int blocksize, long len) { + BlockLocation[] blocks = new BlockLocation[nblocks]; + if (0 == nblocks) { + return blocks; + } + for (int i = 0; i < nblocks - 1; ++i) { + blocks[i] = new BlockLocation(null, null, i * blocksize, blocksize); + } + blocks[nblocks - 1] = new BlockLocation(null, null, + (nblocks - 1) * blocksize, + 0 == (len % blocksize) ? blocksize : len % blocksize); + return blocks; + } + + static String genName(Random r, int min, int max) { + int len = r.nextInt(max - min + 1) + min; + char[] ret = new char[len]; + while (len > 0) { + int c = r.nextInt() & 0x7F; // restrict to ASCII + if (Character.isLetterOrDigit(c)) { + ret[--len] = (char) c; + } + } + return new String(ret); + } + + class RandomTreeIterator extends TreeIterator { + + RandomTreeIterator() { + } + + RandomTreeIterator(long seed) { + Random r = new Random(seed); + FileStatus iroot = genFileStatus(null, r); + getPendingQueue().addFirst(new TreePath(iroot, -1, this)); + } + + RandomTreeIterator(TreePath p) { + getPendingQueue().addFirst( + new TreePath(p.getFileStatus(), p.getParentId(), this)); + } + + @Override + public TreeIterator fork() { + if (getPendingQueue().isEmpty()) { + return new RandomTreeIterator(); + } + return new RandomTreeIterator(getPendingQueue().removeFirst()); + } + + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java new file mode 100644 index 0000000000..8b52ffd8fa --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.Iterator; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import static org.junit.Assert.*; + +/** + * Validate fixed-size block partitioning. + */ +public class TestFixedBlockResolver { + + @Rule public TestName name = new TestName(); + + private final FixedBlockResolver blockId = new FixedBlockResolver(); + + @Before + public void setup() { + Configuration conf = new Configuration(false); + conf.setLong(FixedBlockResolver.BLOCKSIZE, 512L * (1L << 20)); + conf.setLong(FixedBlockResolver.START_BLOCK, 512L * (1L << 20)); + blockId.setConf(conf); + System.out.println(name.getMethodName()); + } + + @Test + public void testExactBlock() throws Exception { + FileStatus f = file(512, 256); + int nblocks = 0; + for (BlockProto b : blockId.resolve(f)) { + ++nblocks; + assertEquals(512L * (1L << 20), b.getNumBytes()); + } + assertEquals(1, nblocks); + + FileStatus g = file(1024, 256); + nblocks = 0; + for (BlockProto b : blockId.resolve(g)) { + ++nblocks; + assertEquals(512L * (1L << 20), b.getNumBytes()); + } + assertEquals(2, nblocks); + + FileStatus h = file(5120, 256); + nblocks = 0; + for (BlockProto b : blockId.resolve(h)) { + ++nblocks; + assertEquals(512L * (1L << 20), b.getNumBytes()); + } + assertEquals(10, nblocks); + } + + @Test + public void testEmpty() throws Exception { + FileStatus f = file(0, 100); + Iterator b = blockId.resolve(f).iterator(); + assertTrue(b.hasNext()); + assertEquals(0, b.next().getNumBytes()); + assertFalse(b.hasNext()); + } + + @Test + public void testRandomFile() throws Exception { + Random r = new Random(); + long seed = r.nextLong(); + System.out.println("seed: " + seed); + r.setSeed(seed); + + int len = r.nextInt(4096) + 512; + int blk = r.nextInt(len - 128) + 128; + FileStatus s = file(len, blk); + long nbytes = 0; + for (BlockProto b : blockId.resolve(s)) { + nbytes += b.getNumBytes(); + assertTrue(512L * (1L << 20) >= b.getNumBytes()); + } + assertEquals(s.getLen(), nbytes); + } + + FileStatus file(long lenMB, long blocksizeMB) { + Path p = new Path("foo://bar:4344/baz/dingo"); + return new FileStatus( + lenMB * (1 << 20), /* long length, */ + false, /* boolean isdir, */ + 1, /* int block_replication, */ + blocksizeMB * (1 << 20), /* long blocksize, */ + 0L, /* long modification_time, */ + 0L, /* long access_time, */ + null, /* FsPermission permission, */ + "hadoop", /* String owner, */ + "hadoop", /* String group, */ + p); /* Path path */ + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java new file mode 100644 index 0000000000..b8e6ac9ac6 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import static org.junit.Assert.*; + +/** + * Validate randomly generated hierarchies, including fork() support in + * base class. + */ +public class TestRandomTreeWalk { + + @Rule public TestName name = new TestName(); + + private Random r = new Random(); + + @Before + public void setSeed() { + long seed = r.nextLong(); + r.setSeed(seed); + System.out.println(name.getMethodName() + " seed: " + seed); + } + + @Test + public void testRandomTreeWalkRepeat() throws Exception { + Set ns = new HashSet<>(); + final long seed = r.nextLong(); + RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .1f); + int i = 0; + for (TreePath p : t1) { + p.accept(i++); + assertTrue(ns.add(p)); + } + + RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .1f); + int j = 0; + for (TreePath p : t2) { + p.accept(j++); + assertTrue(ns.remove(p)); + } + assertTrue(ns.isEmpty()); + } + + @Test + public void testRandomTreeWalkFork() throws Exception { + Set ns = new HashSet<>(); + + final long seed = r.nextLong(); + RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .15f); + int i = 0; + for (TreePath p : t1) { + p.accept(i++); + assertTrue(ns.add(p.getFileStatus())); + } + + RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .15f); + int j = 0; + ArrayList iters = new ArrayList<>(); + iters.add(t2.iterator()); + while (!iters.isEmpty()) { + for (TreeWalk.TreeIterator sub = iters.remove(iters.size() - 1); + sub.hasNext();) { + TreePath p = sub.next(); + if (0 == (r.nextInt() % 4)) { + iters.add(sub.fork()); + Collections.shuffle(iters, r); + } + p.accept(j++); + assertTrue(ns.remove(p.getFileStatus())); + } + } + assertTrue(ns.isEmpty()); + } + + @Test + public void testRandomRootWalk() throws Exception { + Set ns = new HashSet<>(); + final long seed = r.nextLong(); + Path root = new Path("foo://bar:4344/dingos"); + String sroot = root.toString(); + int nroot = sroot.length(); + RandomTreeWalk t1 = new RandomTreeWalk(root, seed, 10, .1f); + int i = 0; + for (TreePath p : t1) { + p.accept(i++); + FileStatus stat = p.getFileStatus(); + assertTrue(ns.add(stat)); + assertEquals(sroot, stat.getPath().toString().substring(0, nroot)); + } + + RandomTreeWalk t2 = new RandomTreeWalk(root, seed, 10, .1f); + int j = 0; + for (TreePath p : t2) { + p.accept(j++); + FileStatus stat = p.getFileStatus(); + assertTrue(ns.remove(stat)); + assertEquals(sroot, stat.getPath().toString().substring(0, nroot)); + } + assertTrue(ns.isEmpty()); + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java new file mode 100644 index 0000000000..9aef106371 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSingleUGIResolver.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import static org.junit.Assert.*; + +/** + * Validate resolver assigning all paths to a single owner/group. + */ +public class TestSingleUGIResolver { + + @Rule public TestName name = new TestName(); + + private static final int TESTUID = 10101; + private static final int TESTGID = 10102; + private static final String TESTUSER = "tenaqvyybdhragqvatbf"; + private static final String TESTGROUP = "tnyybcvatlnxf"; + + private SingleUGIResolver ugi = new SingleUGIResolver(); + + @Before + public void setup() { + Configuration conf = new Configuration(false); + conf.setInt(SingleUGIResolver.UID, TESTUID); + conf.setInt(SingleUGIResolver.GID, TESTGID); + conf.set(SingleUGIResolver.USER, TESTUSER); + conf.set(SingleUGIResolver.GROUP, TESTGROUP); + ugi.setConf(conf); + System.out.println(name.getMethodName()); + } + + @Test + public void testRewrite() { + FsPermission p1 = new FsPermission((short)0755); + match(ugi.resolve(file("dingo", "dingo", p1)), p1); + match(ugi.resolve(file(TESTUSER, "dingo", p1)), p1); + match(ugi.resolve(file("dingo", TESTGROUP, p1)), p1); + match(ugi.resolve(file(TESTUSER, TESTGROUP, p1)), p1); + + FsPermission p2 = new FsPermission((short)0x8000); + match(ugi.resolve(file("dingo", "dingo", p2)), p2); + match(ugi.resolve(file(TESTUSER, "dingo", p2)), p2); + match(ugi.resolve(file("dingo", TESTGROUP, p2)), p2); + match(ugi.resolve(file(TESTUSER, TESTGROUP, p2)), p2); + + Map ids = ugi.ugiMap(); + assertEquals(2, ids.size()); + assertEquals(TESTUSER, ids.get(10101)); + assertEquals(TESTGROUP, ids.get(10102)); + } + + @Test + public void testDefault() { + String user; + try { + user = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + user = "hadoop"; + } + Configuration conf = new Configuration(false); + ugi.setConf(conf); + Map ids = ugi.ugiMap(); + assertEquals(2, ids.size()); + assertEquals(user, ids.get(0)); + assertEquals(user, ids.get(1)); + } + + @Test(expected=IllegalArgumentException.class) + public void testInvalidUid() { + Configuration conf = ugi.getConf(); + conf.setInt(SingleUGIResolver.UID, (1 << 24) + 1); + ugi.setConf(conf); + ugi.resolve(file(TESTUSER, TESTGROUP, new FsPermission((short)0777))); + } + + @Test(expected=IllegalArgumentException.class) + public void testInvalidGid() { + Configuration conf = ugi.getConf(); + conf.setInt(SingleUGIResolver.GID, (1 << 24) + 1); + ugi.setConf(conf); + ugi.resolve(file(TESTUSER, TESTGROUP, new FsPermission((short)0777))); + } + + @Test(expected=IllegalStateException.class) + public void testDuplicateIds() { + Configuration conf = new Configuration(false); + conf.setInt(SingleUGIResolver.UID, 4344); + conf.setInt(SingleUGIResolver.GID, 4344); + conf.set(SingleUGIResolver.USER, TESTUSER); + conf.set(SingleUGIResolver.GROUP, TESTGROUP); + ugi.setConf(conf); + ugi.ugiMap(); + } + + static void match(long encoded, FsPermission p) { + assertEquals(p, new FsPermission((short)(encoded & 0xFFFF))); + long uid = (encoded >>> UGIResolver.USER_STRID_OFFSET); + uid &= UGIResolver.USER_GROUP_STRID_MASK; + assertEquals(TESTUID, uid); + long gid = (encoded >>> UGIResolver.GROUP_STRID_OFFSET); + gid &= UGIResolver.USER_GROUP_STRID_MASK; + assertEquals(TESTGID, gid); + } + + static FileStatus file(String user, String group, FsPermission perm) { + Path p = new Path("foo://bar:4344/baz/dingo"); + return new FileStatus( + 4344 * (1 << 20), /* long length, */ + false, /* boolean isdir, */ + 1, /* int block_replication, */ + 256 * (1 << 20), /* long blocksize, */ + 0L, /* long modification_time, */ + 0L, /* long access_time, */ + perm, /* FsPermission permission, */ + user, /* String owner, */ + group, /* String group, */ + p); /* Path path */ + } + +} diff --git a/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties b/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties new file mode 100644 index 0000000000..2ebf29e2b6 --- /dev/null +++ b/hadoop-tools/hadoop-fs2img/src/test/resources/log4j.properties @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# log4j configuration used during build and unit tests + +log4j.rootLogger=INFO,stdout +log4j.threshold=ALL +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n + diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 28faa9f0ee..4b903614c7 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -128,6 +128,12 @@ compile ${project.version} + + org.apache.hadoop + hadoop-fs2img + compile + ${project.version} + diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml index 6f95f117ef..c0300459d8 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml @@ -48,6 +48,7 @@ hadoop-kafka hadoop-azure-datalake hadoop-aliyun + hadoop-fs2img