HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by Zhe Zhang.
This commit is contained in:
parent
c60da4d3b3
commit
ea1c6f31c2
@ -38,7 +38,7 @@ Overview
|
||||
How to Create an Archive
|
||||
------------------------
|
||||
|
||||
`Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>`
|
||||
`Usage: hadoop archive -archiveName name -p <parent> [-r <replication factor>] <src>* <dest>`
|
||||
|
||||
-archiveName is the name of the archive you would like to create. An example
|
||||
would be foo.har. The name should have a \*.har extension. The parent argument
|
||||
@ -52,9 +52,12 @@ How to Create an Archive
|
||||
would need a map reduce cluster to run this. For a detailed example the later
|
||||
sections.
|
||||
|
||||
-r indicates the desired replication factor; if this optional argument is
|
||||
not specified, a replication factor of 10 will be used.
|
||||
|
||||
If you just want to archive a single directory /foo/bar then you can just use
|
||||
|
||||
`hadoop archive -archiveName zoo.har -p /foo/bar /outputdir`
|
||||
`hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir`
|
||||
|
||||
How to Look Up Files in Archives
|
||||
--------------------------------
|
||||
@ -90,14 +93,15 @@ Archives Examples
|
||||
|
||||
$H3 Creating an Archive
|
||||
|
||||
`hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo`
|
||||
`hadoop archive -archiveName foo.har -p /user/hadoop -r 3 dir1 dir2 /user/zoo`
|
||||
|
||||
The above example is creating an archive using /user/hadoop as the relative
|
||||
archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
|
||||
will be archived in the following file system directory -- /user/zoo/foo.har.
|
||||
Archiving does not delete the input files. If you want to delete the input
|
||||
files after creating the archives (to reduce namespace), you will have to do
|
||||
it on your own.
|
||||
it on your own. In this example, because `-r 3` is specified, a replication
|
||||
factor of 3 will be used.
|
||||
|
||||
$H3 Looking Up Files
|
||||
|
||||
|
@ -97,9 +97,12 @@ public class HadoopArchives implements Tool {
|
||||
long partSize = 2 * 1024 * 1024 * 1024l;
|
||||
/** size of blocks in hadoop archives **/
|
||||
long blockSize = 512 * 1024 * 1024l;
|
||||
/** the desired replication degree; default is 10 **/
|
||||
short repl = 10;
|
||||
|
||||
private static final String usage = "archive"
|
||||
+ " -archiveName NAME -p <parent path> <src>* <dest>" +
|
||||
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" +
|
||||
"<src>* <dest>" +
|
||||
"\n";
|
||||
|
||||
|
||||
@ -542,7 +545,7 @@ void archive(Path parentPath, List<Path> srcPaths,
|
||||
srcWriter.close();
|
||||
}
|
||||
//increase the replication of src files
|
||||
jobfs.setReplication(srcFiles, (short) 10);
|
||||
jobfs.setReplication(srcFiles, repl);
|
||||
conf.setInt(SRC_COUNT_LABEL, numFiles);
|
||||
conf.setLong(TOTAL_SIZE_LABEL, totalSize);
|
||||
int numMaps = (int)(totalSize/partSize);
|
||||
@ -835,6 +838,11 @@ public int run(String[] args) throws Exception {
|
||||
}
|
||||
|
||||
i+=2;
|
||||
|
||||
if ("-r".equals(args[i])) {
|
||||
repl = Short.parseShort(args[i+1]);
|
||||
i+=2;
|
||||
}
|
||||
//read the rest of the paths
|
||||
for (; i < args.length; i++) {
|
||||
if (i == (args.length - 1)) {
|
||||
|
@ -157,6 +157,24 @@ public void testRelativePath() throws Exception {
|
||||
final List<String> harPaths = lsr(shell, fullHarPathStr);
|
||||
Assert.assertEquals(originalPaths, harPaths);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRelativePathWitRepl() throws Exception {
|
||||
final Path sub1 = new Path(inputPath, "dir1");
|
||||
fs.mkdirs(sub1);
|
||||
createFile(inputPath, fs, sub1.getName(), "a");
|
||||
final FsShell shell = new FsShell(conf);
|
||||
|
||||
final List<String> originalPaths = lsr(shell, "input");
|
||||
System.out.println("originalPaths: " + originalPaths);
|
||||
|
||||
// make the archive:
|
||||
final String fullHarPathStr = makeArchiveWithRepl();
|
||||
|
||||
// compare results:
|
||||
final List<String> harPaths = lsr(shell, fullHarPathStr);
|
||||
Assert.assertEquals(originalPaths, harPaths);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPathWithSpaces() throws Exception {
|
||||
@ -625,6 +643,29 @@ private String makeArchive() throws Exception {
|
||||
assertEquals(0, ToolRunner.run(har, args));
|
||||
return fullHarPathStr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the HadoopArchives tool to create an archive on the
|
||||
* given file system with a specified replication degree.
|
||||
*/
|
||||
private String makeArchiveWithRepl() throws Exception {
|
||||
final String inputPathStr = inputPath.toUri().getPath();
|
||||
System.out.println("inputPathStr = " + inputPathStr);
|
||||
|
||||
final URI uri = fs.getUri();
|
||||
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
|
||||
+ archivePath.toUri().getPath() + Path.SEPARATOR;
|
||||
|
||||
final String harName = "foo.har";
|
||||
final String fullHarPathStr = prefix + harName;
|
||||
final String[] args = { "-archiveName", harName, "-p", inputPathStr,
|
||||
"-r 3", "*", archivePath.toString() };
|
||||
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
|
||||
HADOOP_ARCHIVES_JAR);
|
||||
final HadoopArchives har = new HadoopArchives(conf);
|
||||
assertEquals(0, ToolRunner.run(har, args));
|
||||
return fullHarPathStr;
|
||||
}
|
||||
|
||||
@Test
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user