MAPREDUCE-7018. Apply erasure coding properly to framework tarball and support plain tar (miklos.szegedi@cloudera.com via rkanter)
This commit is contained in:
parent
00129c5314
commit
2316f52690
@ -81,7 +81,6 @@ public class FrameworkUploader implements Runnable {
|
||||
|
||||
@VisibleForTesting
|
||||
OutputStream targetStream = null;
|
||||
private Path targetPath = null;
|
||||
private String alias = null;
|
||||
|
||||
private void printHelp(Options options) {
|
||||
@ -140,11 +139,12 @@ void collectPackages() throws UploaderException {
|
||||
}
|
||||
}
|
||||
|
||||
private void beginUpload() throws IOException, UploaderException {
|
||||
@VisibleForTesting
|
||||
void beginUpload() throws IOException, UploaderException {
|
||||
if (targetStream == null) {
|
||||
validateTargetPath();
|
||||
int lastIndex = target.indexOf('#');
|
||||
targetPath =
|
||||
Path targetPath =
|
||||
new Path(
|
||||
target.substring(
|
||||
0, lastIndex == -1 ? target.length() : lastIndex));
|
||||
@ -153,8 +153,38 @@ private void beginUpload() throws IOException, UploaderException {
|
||||
targetPath.getName();
|
||||
LOG.info("Target " + targetPath);
|
||||
FileSystem fileSystem = targetPath.getFileSystem(new Configuration());
|
||||
|
||||
targetStream = null;
|
||||
if (fileSystem instanceof DistributedFileSystem) {
|
||||
LOG.info("Set replication to " +
|
||||
replication + " for path: " + targetPath);
|
||||
LOG.info("Disabling Erasure Coding for path: " + targetPath);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSystem;
|
||||
DistributedFileSystem.HdfsDataOutputStreamBuilder builder =
|
||||
dfs.createFile(targetPath)
|
||||
.overwrite(true)
|
||||
.ecPolicyName(
|
||||
SystemErasureCodingPolicies.getReplicationPolicy().getName());
|
||||
if (replication > 0) {
|
||||
builder.replication(replication);
|
||||
}
|
||||
targetStream = builder.build();
|
||||
} else {
|
||||
LOG.warn("Cannot set replication to " +
|
||||
replication + " for path: " + targetPath +
|
||||
" on a non-distributed fileystem " +
|
||||
fileSystem.getClass().getName());
|
||||
}
|
||||
if (targetStream == null) {
|
||||
targetStream = fileSystem.create(targetPath, true);
|
||||
}
|
||||
|
||||
if (targetPath.getName().endsWith("gz") ||
|
||||
targetPath.getName().endsWith("tgz")) {
|
||||
LOG.info("Creating GZip");
|
||||
targetStream = new GZIPOutputStream(targetStream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
@ -162,7 +192,7 @@ void buildPackage() throws IOException, UploaderException {
|
||||
beginUpload();
|
||||
LOG.info("Compressing tarball");
|
||||
try (TarArchiveOutputStream out = new TarArchiveOutputStream(
|
||||
new GZIPOutputStream(targetStream))) {
|
||||
targetStream)) {
|
||||
for (String fullPath : filteredInputFiles) {
|
||||
LOG.info("Adding " + fullPath);
|
||||
File file = new File(fullPath);
|
||||
@ -178,25 +208,6 @@ void buildPackage() throws IOException, UploaderException {
|
||||
targetStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (targetPath == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set file attributes
|
||||
FileSystem fileSystem = targetPath.getFileSystem(new Configuration());
|
||||
if (fileSystem instanceof DistributedFileSystem) {
|
||||
LOG.info("Disabling Erasure Coding for path: " + targetPath);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem) fileSystem;
|
||||
dfs.setErasureCodingPolicy(targetPath,
|
||||
SystemErasureCodingPolicies.getReplicationPolicy().getName());
|
||||
}
|
||||
|
||||
if (replication > 0) {
|
||||
LOG.info("Set replication to " +
|
||||
replication + " for path: " + targetPath);
|
||||
fileSystem.setReplication(targetPath, replication);
|
||||
}
|
||||
}
|
||||
|
||||
private void parseLists() throws UploaderException {
|
||||
|
@ -30,6 +30,7 @@
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -171,23 +172,30 @@ public void testCollectPackages() throws IOException, UploaderException {
|
||||
*/
|
||||
@Test
|
||||
public void testBuildTarBall() throws IOException, UploaderException {
|
||||
String[] testFiles = {"upload.tar", "upload.tar.gz"};
|
||||
for (String testFile: testFiles) {
|
||||
File parent = new File(testDir);
|
||||
try {
|
||||
parent.deleteOnExit();
|
||||
FrameworkUploader uploader = prepareTree(parent);
|
||||
|
||||
File gzipFile = new File("upload.tar.gz");
|
||||
File gzipFile =
|
||||
new File(parent.getAbsolutePath() + "/" + testFile);
|
||||
gzipFile.deleteOnExit();
|
||||
Assert.assertTrue("Creating output", gzipFile.createNewFile());
|
||||
uploader.targetStream = new FileOutputStream(gzipFile);
|
||||
|
||||
uploader.target =
|
||||
"file:///" + gzipFile.getAbsolutePath();
|
||||
uploader.beginUpload();
|
||||
uploader.buildPackage();
|
||||
InputStream stream = new FileInputStream(gzipFile);
|
||||
if (gzipFile.getName().endsWith(".gz")) {
|
||||
stream = new GZIPInputStream(stream);
|
||||
}
|
||||
|
||||
TarArchiveInputStream result = null;
|
||||
try {
|
||||
result =
|
||||
new TarArchiveInputStream(
|
||||
new GZIPInputStream(new FileInputStream(gzipFile)));
|
||||
new TarArchiveInputStream(stream);
|
||||
Set<String> fileNames = new HashSet<>();
|
||||
Set<Long> sizes = new HashSet<>();
|
||||
TarArchiveEntry entry1 = result.getNextTarEntry();
|
||||
@ -213,6 +221,7 @@ public void testBuildTarBall() throws IOException, UploaderException {
|
||||
FileUtils.deleteDirectory(parent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test upload to HDFS.
|
||||
|
Loading…
Reference in New Issue
Block a user