From 82a750564631eb3077603bed27058006b3d66309 Mon Sep 17 00:00:00 2001 From: Adam Antal Date: Tue, 25 Aug 2020 13:29:12 +0200 Subject: [PATCH] YARN-10304. Create an endpoint for remote application log directory path query. Contributed by Andras Gyori --- .../mapreduce/v2/hs/webapp/HsWebServices.java | 18 +++ .../v2/hs/webapp/TestHsWebServicesLogs.java | 128 +++++++++++++++++- .../LogAggregationFileController.java | 8 ++ .../hadoop/yarn/server/webapp/LogServlet.java | 48 +++++++ .../server/webapp/YarnWebServiceParams.java | 1 + .../server/webapp/dao/RemoteLogPathEntry.java | 53 ++++++++ .../server/webapp/dao/RemoteLogPaths.java | 50 +++++++ 7 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPathEntry.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPaths.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java index 4ba8fe0b37..008edf5e57 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java @@ -423,6 +423,24 @@ public JobTaskAttemptCounterInfo getJobTaskAttemptIdCounters( return new JobTaskAttemptCounterInfo(ta); } + /** + * Returns the user qualified path name of the remote log directory for + * each pre-configured log aggregation file controller. + * + * @param req HttpServletRequest + * @return Path names grouped by file controller name + */ + @GET + @Path("/remote-log-dir") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public Response getRemoteLogDirPath(@Context HttpServletRequest req, + @QueryParam(YarnWebServiceParams.REMOTE_USER) String user, + @QueryParam(YarnWebServiceParams.APP_ID) String appIdStr) + throws IOException { + init(); + return logServlet.getRemoteLogDirPath(user, appIdStr); + } + @GET @Path("/aggregatedlogs") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesLogs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesLogs.java index 22aa3acd9a..2b43e240dc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesLogs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesLogs.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; @@ -48,9 +49,13 @@ import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileController; +import org.apache.hadoop.yarn.logaggregation.filecontroller.ifile.LogAggregationIndexedFileController; import org.apache.hadoop.yarn.server.webapp.LogServlet; import org.apache.hadoop.yarn.server.webapp.YarnWebServiceParams; import org.apache.hadoop.yarn.server.webapp.dao.ContainerLogsInfo; +import org.apache.hadoop.yarn.server.webapp.dao.RemoteLogPathEntry; +import org.apache.hadoop.yarn.server.webapp.dao.RemoteLogPaths; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; import org.apache.hadoop.yarn.webapp.GuiceServletConfig; @@ -68,6 +73,8 @@ import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -76,6 +83,8 @@ import java.util.stream.Collectors; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; @@ -116,6 +125,9 @@ public class TestHsWebServicesLogs extends JerseyTestBase { private static final String USER = "fakeUser"; private static final String FILE_NAME = "syslog"; + private static final String REMOTE_LOG_DIR_SUFFIX = "test-logs"; + private static final String[] FILE_FORMATS = {"IFile", "TFile"}; + private static final String NM_WEBADDRESS_1 = "test-nm-web-address-1:9999"; private static final NodeId NM_ID_1 = NodeId.newInstance("fakeHost1", 9951); private static final String NM_WEBADDRESS_2 = "test-nm-web-address-2:9999"; @@ -156,6 +168,17 @@ public class TestHsWebServicesLogs extends JerseyTestBase { } private static class WebServletModule extends ServletModule { + private Configuration newConf; + + WebServletModule() { + super(); + } + + WebServletModule(Configuration newConf) { + super(); + this.newConf = newConf; + } + @Override protected void configureServlets() { MockHistoryContext appContext = new MockHistoryContext(0, 1, 2, 1); @@ -199,8 +222,9 @@ protected void configureServlets() { fail("Failed to setup WebServletModule class"); } + Configuration usedConf = newConf == null ? conf : newConf; HsWebServices hsWebServices = - new HsWebServices(appContext, conf, webApp, mockProtocol); + new HsWebServices(appContext, usedConf, webApp, mockProtocol); try { LogServlet logServlet = hsWebServices.getLogServlet(); logServlet = spy(logServlet); @@ -576,6 +600,92 @@ public void testGetContainerLogFileForRunningContainer() throws Exception { + ContainerLogAggregationType.AGGREGATED, "Hello-" + CONTAINER_2_2_3); } + @Test + public void testRemoteLogDirWithUser() { + createReconfiguredServlet(); + + WebResource r = resource(); + ClientResponse response = r.path("ws").path("v1") + .path("history").path("remote-log-dir") + .queryParam(YarnWebServiceParams.REMOTE_USER, + USER) + .accept(MediaType.APPLICATION_JSON) + .get(ClientResponse.class); + RemoteLogPaths res = response. + getEntity(new GenericType(){}); + + List collectedControllerNames = new ArrayList<>(); + for (RemoteLogPathEntry entry: res.getPaths()) { + String path = String.format("%s/%s/bucket-%s-%s", + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR, USER, + REMOTE_LOG_DIR_SUFFIX, entry.getFileController().toLowerCase()); + collectedControllerNames.add(entry.getFileController()); + assertEquals(entry.getPath(), path); + } + + assertTrue(collectedControllerNames.containsAll( + Arrays.asList(FILE_FORMATS))); + } + + @Test + public void testRemoteLogDir() { + createReconfiguredServlet(); + UserGroupInformation ugi = UserGroupInformation. + createRemoteUser(USER); + UserGroupInformation.setLoginUser(ugi); + + WebResource r = resource(); + ClientResponse response = r.path("ws").path("v1") + .path("history").path("remote-log-dir") + .accept(MediaType.APPLICATION_JSON) + .get(ClientResponse.class); + RemoteLogPaths res = response. + getEntity(new GenericType(){}); + + List collectedControllerNames = new ArrayList<>(); + for (RemoteLogPathEntry entry: res.getPaths()) { + String path = String.format("%s/%s/bucket-%s-%s", + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR, USER, + REMOTE_LOG_DIR_SUFFIX, entry.getFileController().toLowerCase()); + collectedControllerNames.add(entry.getFileController()); + assertEquals(entry.getPath(), path); + } + + assertTrue(collectedControllerNames.containsAll( + Arrays.asList(FILE_FORMATS))); + } + + @Test + public void testRemoteLogDirWithUserAndAppId() { + createReconfiguredServlet(); + + WebResource r = resource(); + ClientResponse response = r.path("ws").path("v1") + .path("history").path("remote-log-dir") + .queryParam(YarnWebServiceParams.REMOTE_USER, + USER) + .queryParam(YarnWebServiceParams.APP_ID, + APPID_1.toString()) + .accept(MediaType.APPLICATION_JSON) + .get(ClientResponse.class); + RemoteLogPaths res = response. + getEntity(new GenericType(){}); + + List collectedControllerNames = new ArrayList<>(); + for (RemoteLogPathEntry entry: res.getPaths()) { + String path = String.format("%s/%s/bucket-%s-%s/0001/%s", + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR, USER, + REMOTE_LOG_DIR_SUFFIX, entry.getFileController().toLowerCase(), + APPID_1.toString()); + collectedControllerNames.add(entry.getFileController()); + assertEquals(entry.getPath(), path); + } + + assertTrue(collectedControllerNames.containsAll( + Arrays.asList(FILE_FORMATS))); + } + + @Test public void testNonExistingAppId() { ApplicationId nonExistingApp = ApplicationId.newInstance(99, 99); @@ -763,4 +873,20 @@ private static String getRedirectURL(String url) throws Exception { } return null; } + + private void createReconfiguredServlet() { + Configuration newConf = new YarnConfiguration(); + newConf.setStrings(YarnConfiguration.LOG_AGGREGATION_FILE_FORMATS, + FILE_FORMATS); + newConf.setClass(String.format( + YarnConfiguration.LOG_AGGREGATION_FILE_CONTROLLER_FMT, "IFile"), + LogAggregationIndexedFileController.class, + LogAggregationFileController.class); + newConf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR); + newConf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR_SUFFIX, + REMOTE_LOG_DIR_SUFFIX); + GuiceServletConfig.setInjector( + Guice.createInjector(new WebServletModule(newConf))); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java index 2bf5f4e6a4..9c609beb59 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java @@ -157,6 +157,14 @@ public String getRemoteRootLogDirSuffix() { return this.remoteRootLogDirSuffix; } + /** + * Get the name of the file controller. + * @return name of the file controller. + */ + public String getFileControllerName() { + return this.fileControllerName; + } + /** * Initialize the writer. * @param context the {@link LogAggregationFileControllerContext} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/LogServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/LogServlet.java index 33de8df011..b4f9a1f898 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/LogServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/LogServlet.java @@ -24,13 +24,19 @@ import com.sun.jersey.api.client.UniformInterfaceException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; import org.apache.hadoop.yarn.logaggregation.ContainerLogMeta; +import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileController; import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileControllerFactory; import org.apache.hadoop.yarn.server.webapp.dao.ContainerLogsInfo; +import org.apache.hadoop.yarn.server.webapp.dao.RemoteLogPathEntry; +import org.apache.hadoop.yarn.server.webapp.dao.RemoteLogPaths; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.NotFoundException; @@ -45,6 +51,7 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.Status; import javax.ws.rs.core.StreamingOutput; +import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; @@ -174,6 +181,47 @@ private void validateUserInput(ApplicationId applicationId, } } + /** + * Returns the user qualified path name of the remote log directory for + * each pre-configured log aggregation file controller. + * + * @return {@link Response} object containing remote log dir path names + */ + public Response getRemoteLogDirPath(String user, String applicationId) + throws IOException { + String remoteUser = user; + ApplicationId appId = applicationId != null ? + ApplicationIdPBImpl.fromString(applicationId) : null; + + if (remoteUser == null) { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + remoteUser = ugi.getUserName(); + } + + List fileControllers = + getOrCreateFactory().getConfiguredLogAggregationFileControllerList(); + List paths = new ArrayList<>(); + + for (LogAggregationFileController fileController : fileControllers) { + String path; + if (appId != null) { + path = fileController.getRemoteAppLogDir(appId, remoteUser).toString(); + } else { + path = LogAggregationUtils.getRemoteLogSuffixedDir( + fileController.getRemoteRootLogDir(), + remoteUser, fileController.getRemoteRootLogDirSuffix()).toString(); + } + + paths.add(new RemoteLogPathEntry(fileController.getFileControllerName(), + path)); + } + + RemoteLogPaths result = new RemoteLogPaths(paths); + Response.ResponseBuilder response = Response.ok().entity(result); + response.header("X-Content-Type-Options", "nosniff"); + return response.build(); + } + public Response getLogsInfo(HttpServletRequest hsr, String appIdStr, String appAttemptIdStr, String containerIdStr, String nmId, boolean redirectedFromNode, boolean manualRedirection) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/YarnWebServiceParams.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/YarnWebServiceParams.java index 0d9e9f68c1..3aade3faaf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/YarnWebServiceParams.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/YarnWebServiceParams.java @@ -39,4 +39,5 @@ public interface YarnWebServiceParams { String REDIRECTED_FROM_NODE = "redirected_from_node"; String CLUSTER_ID = "clusterid"; String MANUAL_REDIRECTION = "manual_redirection"; + String REMOTE_USER = "user"; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPathEntry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPathEntry.java new file mode 100644 index 0000000000..76a1b8afa9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPathEntry.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.webapp.dao; + +/** + * A remote log path for a log aggregation file controller. + *
+ *   /%USER/
+ * 
+ */ +public class RemoteLogPathEntry { + private String fileController; + private String path; + + //JAXB needs this + public RemoteLogPathEntry() {} + + public RemoteLogPathEntry(String fileController, String path) { + this.fileController = fileController; + this.path = path; + } + + public String getFileController() { + return fileController; + } + + public void setFileController(String fileController) { + this.fileController = fileController; + } + + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPaths.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPaths.java new file mode 100644 index 0000000000..80354fa0d6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/RemoteLogPaths.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.webapp.dao; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.List; + +/** + * Container of a list of {@link RemoteLogPathEntry}. + */ +@XmlRootElement(name = "remoteLogDirPathResult") +@XmlAccessorType(XmlAccessType.FIELD) +public class RemoteLogPaths { + + @XmlElement(name = "paths") + private List paths; + + //JAXB needs this + public RemoteLogPaths() {} + + public RemoteLogPaths(List paths) { + this.paths = paths; + } + + public List getPaths() { + return paths; + } + + public void setPaths(List paths) { + this.paths = paths; + } +} \ No newline at end of file