MAPREDUCE-7027: HadoopArchiveLogs shouldn't delete the original logs if the HAR creation fails. Contributed by Gergely Novák

This commit is contained in:
Xuan Gong 2018-02-23 14:37:26 -08:00
parent 51088d3233
commit 68ce193efc
2 changed files with 142 additions and 90 deletions

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.tools; package org.apache.hadoop.tools;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.GnuParser;
@ -65,6 +66,9 @@ public class HadoopArchiveLogsRunner implements Tool {
private JobConf conf; private JobConf conf;
@VisibleForTesting
HadoopArchives hadoopArchives;
private static final FsPermission HAR_DIR_PERM = private static final FsPermission HAR_DIR_PERM =
new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE); new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE);
private static final FsPermission HAR_INNER_FILES_PERM = private static final FsPermission HAR_INNER_FILES_PERM =
@ -72,6 +76,7 @@ public class HadoopArchiveLogsRunner implements Tool {
public HadoopArchiveLogsRunner(Configuration conf) { public HadoopArchiveLogsRunner(Configuration conf) {
setConf(conf); setConf(conf);
hadoopArchives = new HadoopArchives(conf);
} }
public static void main(String[] args) { public static void main(String[] args) {
@ -132,10 +137,10 @@ private int runInternal() throws Exception {
conf.set("mapreduce.framework.name", "local"); conf.set("mapreduce.framework.name", "local");
// Set the umask so we get 640 files and 750 dirs // Set the umask so we get 640 files and 750 dirs
conf.set("fs.permissions.umask-mode", "027"); conf.set("fs.permissions.umask-mode", "027");
HadoopArchives ha = new HadoopArchives(conf); String harName = appId + ".har";
String[] haArgs = { String[] haArgs = {
"-archiveName", "-archiveName",
appId + ".har", harName,
"-p", "-p",
remoteAppLogDir, remoteAppLogDir,
"*", "*",
@ -146,15 +151,26 @@ private int runInternal() throws Exception {
sb.append("\n\t").append(haArg); sb.append("\n\t").append(haArg);
} }
LOG.info(sb.toString()); LOG.info(sb.toString());
ha.run(haArgs); int exitCode = hadoopArchives.run(haArgs);
if (exitCode != 0) {
LOG.warn("Failed to create archives for " + appId);
return -1;
}
FileSystem fs = null; FileSystem fs = null;
// Move har file to correct location and delete original logs // Move har file to correct location and delete original logs
try { try {
fs = FileSystem.get(conf); fs = FileSystem.get(conf);
Path harDest = new Path(remoteAppLogDir, appId + ".har"); Path harPath = new Path(workingDir, harName);
if (!fs.exists(harPath) ||
fs.listStatus(harPath).length == 0) {
LOG.warn("The created archive \"" + harName +
"\" is missing or empty.");
return -1;
}
Path harDest = new Path(remoteAppLogDir, harName);
LOG.info("Moving har to original location"); LOG.info("Moving har to original location");
fs.rename(new Path(workingDir, appId + ".har"), harDest); fs.rename(harPath, harDest);
LOG.info("Deleting original logs"); LOG.info("Deleting original logs");
for (FileStatus original : fs.listStatus(new Path(remoteAppLogDir), for (FileStatus original : fs.listStatus(new Path(remoteAppLogDir),
new PathFilter() { new PathFilter() {

View File

@ -32,32 +32,50 @@
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.apache.hadoop.yarn.server.MiniYARNCluster;
import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.rules.Timeout;
import org.mockito.Mockito;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Random; import java.util.Random;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
public class TestHadoopArchiveLogsRunner { public class TestHadoopArchiveLogsRunner {
private static final int FILE_SIZE_INCREMENT = 4096; private static final int FILE_SIZE_INCREMENT = 4096;
private static final int[] FILE_SIZES = {3, 4, 2};
private static final int FILE_COUNT = FILE_SIZES.length;
private static final byte[] DUMMY_DATA = new byte[FILE_SIZE_INCREMENT]; private static final byte[] DUMMY_DATA = new byte[FILE_SIZE_INCREMENT];
static { static {
new Random().nextBytes(DUMMY_DATA); new Random().nextBytes(DUMMY_DATA);
} }
@Test(timeout = 50000) private Configuration conf;
public void testHadoopArchiveLogs() throws Exception { private MiniDFSCluster dfsCluster;
MiniDFSCluster dfsCluster = null; private MiniYARNCluster yarnCluster;
FileSystem fs = null; private FileSystem fs;
try (MiniYARNCluster yarnCluster = private ApplicationId app1;
new MiniYARNCluster(TestHadoopArchiveLogsRunner.class.getSimpleName(), private Path app1Path;
1, 2, 1, 1)) { private Path workingDir;
Configuration conf = new YarnConfiguration(); private Path remoteRootLogDir;
private String suffix;
@Rule
public Timeout globalTimeout = new Timeout(50000);
@Before
public void setup() throws Exception {
yarnCluster = new MiniYARNCluster(
TestHadoopArchiveLogsRunner.class.getSimpleName(), 1, 2, 1, 1);
conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true); conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
yarnCluster.init(conf); yarnCluster.init(conf);
@ -66,78 +84,96 @@ public void testHadoopArchiveLogs() throws Exception {
dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
conf = new JobConf(conf); conf = new JobConf(conf);
ApplicationId app1 = app1 = ApplicationId.newInstance(System.currentTimeMillis(), 1);
ApplicationId.newInstance(System.currentTimeMillis(), 1);
fs = FileSystem.get(conf); fs = FileSystem.get(conf);
Path remoteRootLogDir = new Path(conf.get( remoteRootLogDir = new Path(conf.get(
YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
Path workingDir = new Path(remoteRootLogDir, "archive-logs-work"); workingDir = new Path(remoteRootLogDir, "archive-logs-work");
String suffix = "logs"; suffix = "logs";
Path logDir = new Path(remoteRootLogDir, Path logDir = new Path(remoteRootLogDir,
new Path(System.getProperty("user.name"), suffix)); new Path(System.getProperty("user.name"), suffix));
fs.mkdirs(logDir); fs.mkdirs(logDir);
Path app1Path = new Path(logDir, app1.toString()); app1Path = new Path(logDir, app1.toString());
fs.mkdirs(app1Path); fs.mkdirs(app1Path);
createFile(fs, new Path(app1Path, "log1"), 3); for (int i = 0; i < FILE_COUNT; i++) {
createFile(fs, new Path(app1Path, "log2"), 4); createFile(fs, new Path(app1Path, "log" + (i + 1)), FILE_SIZES[i]);
createFile(fs, new Path(app1Path, "log3"), 2); }
FileStatus[] app1Files = fs.listStatus(app1Path); FileStatus[] app1Files = fs.listStatus(app1Path);
Assert.assertEquals(3, app1Files.length); Assert.assertEquals(FILE_COUNT, app1Files.length);
}
String[] args = new String[]{ @After
"-appId", app1.toString(), public void teardown() throws IOException {
"-user", System.getProperty("user.name"), if (fs != null) {
"-workingDir", workingDir.toString(), fs.close();
"-remoteRootLogDir", remoteRootLogDir.toString(), }
"-suffix", suffix}; if (yarnCluster != null) {
yarnCluster.close();
}
if (dfsCluster != null) {
dfsCluster.shutdown();
}
}
@Test
public void testHadoopArchiveLogs() throws Exception {
String[] args = getArgs();
final HadoopArchiveLogsRunner halr = new HadoopArchiveLogsRunner(conf); final HadoopArchiveLogsRunner halr = new HadoopArchiveLogsRunner(conf);
assertEquals(0, ToolRunner.run(halr, args)); assertEquals(0, ToolRunner.run(halr, args));
fs = FileSystem.get(conf); fs = FileSystem.get(conf);
app1Files = fs.listStatus(app1Path); FileStatus[] app1Files = fs.listStatus(app1Path);
Assert.assertEquals(1, app1Files.length); Assert.assertEquals(1, app1Files.length);
FileStatus harFile = app1Files[0]; FileStatus harFile = app1Files[0];
Assert.assertEquals(app1.toString() + ".har", harFile.getPath().getName()); Assert.assertEquals(app1.toString() + ".har", harFile.getPath().getName());
Path harPath = new Path("har:///" + harFile.getPath().toUri().getRawPath()); Path harPath = new Path("har:///" + harFile.getPath().toUri().getRawPath());
FileStatus[] harLogs = HarFs.get(harPath.toUri(), conf).listStatus(harPath); FileStatus[] harLogs = HarFs.get(harPath.toUri(), conf).listStatus(harPath);
Assert.assertEquals(3, harLogs.length); Assert.assertEquals(FILE_COUNT, harLogs.length);
Arrays.sort(harLogs, new Comparator<FileStatus>() { Arrays.sort(harLogs, new Comparator<FileStatus>() {
@Override @Override
public int compare(FileStatus o1, FileStatus o2) { public int compare(FileStatus o1, FileStatus o2) {
return o1.getPath().getName().compareTo(o2.getPath().getName()); return o1.getPath().getName().compareTo(o2.getPath().getName());
} }
}); });
Assert.assertEquals("log1", harLogs[0].getPath().getName()); for (int i = 0; i < FILE_COUNT; i++) {
Assert.assertEquals(3 * FILE_SIZE_INCREMENT, harLogs[0].getLen()); FileStatus harLog = harLogs[i];
Assert.assertEquals("log" + (i + 1), harLog.getPath().getName());
Assert.assertEquals(FILE_SIZES[i] * FILE_SIZE_INCREMENT, harLog.getLen());
Assert.assertEquals( Assert.assertEquals(
new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
harLogs[0].getPermission()); harLog.getPermission());
Assert.assertEquals(System.getProperty("user.name"), Assert.assertEquals(System.getProperty("user.name"),
harLogs[0].getOwner()); harLog.getOwner());
Assert.assertEquals("log2", harLogs[1].getPath().getName()); }
Assert.assertEquals(4 * FILE_SIZE_INCREMENT, harLogs[1].getLen());
Assert.assertEquals(
new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
harLogs[1].getPermission());
Assert.assertEquals(System.getProperty("user.name"),
harLogs[1].getOwner());
Assert.assertEquals("log3", harLogs[2].getPath().getName());
Assert.assertEquals(2 * FILE_SIZE_INCREMENT, harLogs[2].getLen());
Assert.assertEquals(
new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE),
harLogs[2].getPermission());
Assert.assertEquals(System.getProperty("user.name"),
harLogs[2].getOwner());
Assert.assertEquals(0, fs.listStatus(workingDir).length); Assert.assertEquals(0, fs.listStatus(workingDir).length);
} finally {
if (fs != null) {
fs.close();
} }
if (dfsCluster != null) {
dfsCluster.shutdown(); @Test
public void testHadoopArchiveLogsWithArchiveError() throws Exception {
String[] args = getArgs();
final HadoopArchiveLogsRunner halr = new HadoopArchiveLogsRunner(conf);
HadoopArchives mockHadoopArchives = mock(HadoopArchives.class);
when(mockHadoopArchives.run(Mockito.<String[]>any())).thenReturn(-1);
halr.hadoopArchives = mockHadoopArchives;
assertNotEquals(0, ToolRunner.run(halr, args));
// Make sure the original log files are intact
FileStatus[] app1Files = fs.listStatus(app1Path);
assertEquals(FILE_COUNT, app1Files.length);
for (int i = 0; i < FILE_COUNT; i++) {
Assert.assertEquals(FILE_SIZES[i] * FILE_SIZE_INCREMENT,
app1Files[i].getLen());
} }
} }
private String[] getArgs() {
return new String[]{
"-appId", app1.toString(),
"-user", System.getProperty("user.name"),
"-workingDir", workingDir.toString(),
"-remoteRootLogDir", remoteRootLogDir.toString(),
"-suffix", suffix};
} }
private static void createFile(FileSystem fs, Path p, long sizeMultiple) private static void createFile(FileSystem fs, Path p, long sizeMultiple)