From 4d5dd75b607d25adf8b41f7408713dfcea8f5330 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Tue, 3 Oct 2017 10:58:28 -0700 Subject: [PATCH] HADOOP-14908. CrossOriginFilter should trigger regex on more input (Johannes Alberti via aw) --- .../security/http/CrossOriginFilter.java | 27 +++++-- .../src/main/resources/core-default.xml | 12 ++- .../src/site/markdown/HttpAuthentication.md | 2 +- .../security/http/TestCrossOriginFilter.java | 79 +++++++++++++++++++ .../src/site/markdown/TimelineServer.md | 32 ++++---- 5 files changed, 125 insertions(+), 27 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java index 58d50cf972..34d9fe2b70 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java @@ -37,6 +37,7 @@ import org.apache.commons.lang.StringUtils; import com.google.common.annotations.VisibleForTesting; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,6 +67,7 @@ public class CrossOriginFilter implements Filter { // Filter configuration public static final String ALLOWED_ORIGINS = "allowed-origins"; public static final String ALLOWED_ORIGINS_DEFAULT = "*"; + public static final String ALLOWED_ORIGINS_REGEX_PREFIX = "regex:"; public static final String ALLOWED_METHODS = "allowed-methods"; public static final String ALLOWED_METHODS_DEFAULT = "GET,POST,HEAD"; public static final String ALLOWED_HEADERS = "allowed-headers"; @@ -194,6 +196,12 @@ private void initializeAllowedOrigins(FilterConfig filterConfig) { allowAllOrigins = allowedOrigins.contains("*"); LOG.info("Allowed Origins: " + StringUtils.join(allowedOrigins, ',')); LOG.info("Allow All Origins: " + allowAllOrigins); + List discouragedAllowedOrigins = allowedOrigins.stream() + .filter(s -> s.length() > 1 && s.contains("*")) + .collect(Collectors.toList()); + for (String discouragedAllowedOrigin : discouragedAllowedOrigins) { + LOG.warn("Allowed Origin pattern '" + discouragedAllowedOrigin + "' is discouraged, use the 'regex:' prefix and use a Java regular expression instead."); + } } private void initializeMaxAge(FilterConfig filterConfig) { @@ -228,15 +236,20 @@ boolean areOriginsAllowed(String originsList) { String[] origins = originsList.trim().split("\\s+"); for (String origin : origins) { for (String allowedOrigin : allowedOrigins) { - if (allowedOrigin.contains("*")) { - String regex = allowedOrigin.replace(".", "\\.").replace("*", ".*"); - Pattern p = Pattern.compile(regex); - Matcher m = p.matcher(origin); - if (m.matches()) { + Pattern regexPattern = null; + if (allowedOrigin.startsWith(ALLOWED_ORIGINS_REGEX_PREFIX)) { + String regex = allowedOrigin.substring(ALLOWED_ORIGINS_REGEX_PREFIX.length()); + regexPattern = Pattern.compile(regex); + } else if (allowedOrigin.contains("*")) { + String regex = allowedOrigin.replace(".", "\\.").replace("*", ".*"); + regexPattern = Pattern.compile(regex); + } + + if (regexPattern != null + && regexPattern.matcher(origin).matches()) { return true; - } } else if (allowedOrigin.equals(origin)) { - return true; + return true; } } } diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index a11e7c3eea..d481048710 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1861,9 +1861,15 @@ hadoop.http.cross-origin.allowed-origins * - Comma separated list of origins that are allowed for web - services needing cross-origin (CORS) support. Wildcards (*) and patterns - allowed + Comma separated list of origins that are allowed for web services + needing cross-origin (CORS) support. If a value in the list contains an + asterix (*), a regex pattern, escaping any dots ('.' -> '\.') and replacing + the asterix such that it captures any characters ('*' -> '.*'), is generated. + Values prefixed with 'regex:' are interpreted directly as regular expressions, + e.g. use the expression 'regex:https?:\/\/foo\.bar:([0-9]+)?' to allow any + origin using the 'http' or 'https' protocol in the domain 'foo.bar' on any + port. The use of simple wildcards ('*') is discouraged, and only available for + backward compatibility. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md b/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md index d4eeb0e845..44d814c89d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md @@ -60,7 +60,7 @@ Add org.apache.hadoop.security.HttpCrossOriginFilterInitializer to hadoop.http.f | Property | Default Value | Description | |:---------------------------------------- |:--------------------------------------------- |:------------------------------------------------------------------------------------- | | hadoop.http.cross-origin.enabled | `false` | Enables cross origin support for all web-services | -| hadoop.http.cross-origin.allowed-origins | `*` | Comma separated list of origins that are allowed, wildcards (`*`) and patterns allowed | +| hadoop.http.cross-origin.allowed-origins | `*` | Comma separated list of origins that are allowed. Values prefixed with `regex:` are interpreted as regular expressions. Values containing wildcards (`*`) are possible as well, here a regular expression is generated, the use is discouraged and support is only available for backward compatibility. | | hadoop.http.cross-origin.allowed-methods | `GET,POST,HEAD` | Comma separated list of methods that are allowed | | hadoop.http.cross-origin.allowed-headers | `X-Requested-With,Content-Type,Accept,Origin` | Comma separated list of headers that are allowed | | hadoop.http.cross-origin.max-age | `1800` | Number of seconds a pre-flighted request can be cached | diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/http/TestCrossOriginFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/http/TestCrossOriginFilter.java index 5c9b413e6e..b9662b8c6a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/http/TestCrossOriginFilter.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/http/TestCrossOriginFilter.java @@ -127,6 +127,85 @@ public void testPatternMatchingOrigins() throws ServletException, IOException { Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com")); } + @Test + public void testRegexPatternMatchingOrigins() throws ServletException, IOException { + + // Setup the configuration settings of the server + Map conf = new HashMap(); + conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:.*[.]example[.]com"); + FilterConfig filterConfig = new FilterConfigTest(conf); + + // Object under test + CrossOriginFilter filter = new CrossOriginFilter(); + filter.init(filterConfig); + + // match multiple sub-domains + Assert.assertFalse(filter.areOriginsAllowed("example.com")); + Assert.assertFalse(filter.areOriginsAllowed("foo:example.com")); + Assert.assertTrue(filter.areOriginsAllowed("foo.example.com")); + Assert.assertTrue(filter.areOriginsAllowed("foo.bar.example.com")); + + // First origin is allowed + Assert.assertTrue(filter.areOriginsAllowed("foo.example.com foo.nomatch.com")); + // Second origin is allowed + Assert.assertTrue(filter.areOriginsAllowed("foo.nomatch.com foo.example.com")); + // No origin in list is allowed + Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com")); + } + + @Test + public void testComplexRegexPatternMatchingOrigins() throws ServletException, IOException { + + // Setup the configuration settings of the server + Map conf = new HashMap(); + conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:https?:\\/\\/sub1[.]example[.]com(:[0-9]+)?"); + FilterConfig filterConfig = new FilterConfigTest(conf); + + // Object under test + CrossOriginFilter filter = new CrossOriginFilter(); + filter.init(filterConfig); + + Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com")); + Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com")); + Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com:1234")); + Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com:8080")); + + // No origin in list is allowed + Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com")); + } + + @Test + public void testMixedRegexPatternMatchingOrigins() throws ServletException, IOException { + + // Setup the configuration settings of the server + Map conf = new HashMap(); + conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:https?:\\/\\/sub1[.]example[.]com(:[0-9]+)?, " + + "*.example2.com"); + FilterConfig filterConfig = new FilterConfigTest(conf); + + // Object under test + CrossOriginFilter filter = new CrossOriginFilter(); + filter.init(filterConfig); + + Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com")); + Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com")); + Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com:1234")); + Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com:8080")); + + // match multiple sub-domains + Assert.assertFalse(filter.areOriginsAllowed("example2.com")); + Assert.assertFalse(filter.areOriginsAllowed("foo:example2.com")); + Assert.assertTrue(filter.areOriginsAllowed("foo.example2.com")); + Assert.assertTrue(filter.areOriginsAllowed("foo.bar.example2.com")); + + // First origin is allowed + Assert.assertTrue(filter.areOriginsAllowed("foo.example2.com foo.nomatch.com")); + // Second origin is allowed + Assert.assertTrue(filter.areOriginsAllowed("foo.nomatch.com foo.example2.com")); + // No origin in list is allowed + Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com")); + } + @Test public void testDisallowedOrigin() throws ServletException, IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServer.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServer.md index f610cdefe4..a326f45d9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServer.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServer.md @@ -41,9 +41,9 @@ Previously this was supported purely for MapReduce jobs by the Application Histo With the introduction of the timeline server, the Application History Server becomes just one use of the Timeline Server. -Generic information includes application level data such as +Generic information includes application level data such as -* queue-name, +* queue-name, * user information and the like set in the `ApplicationSubmissionContext`, * a list of application-attempts that ran for an application * information about each application-attempt @@ -68,7 +68,7 @@ Current status 1. The "Timeline Server v1" REST API has been declared one of the REST APIs whose compatibility will be maintained in future releases. 1. The single-server implementation of the Timeline Server places a limit on - the scalability of the service; it also prevents the service being + the scalability of the service; it also prevents the service being High-Availability component of the YARN infrastructure. Future Plans @@ -88,9 +88,9 @@ data structures as well as the ability of the client to failover between Timelin The Timeline Domain offers a namespace for Timeline server allowing users to host multiple entities, isolating them from other users and applications. Timeline server Security is defined at this level. - + A "Domain" primarily stores owner info, read and& write ACL information, -created and modified time stamp information. Each Domain is identified by an ID which +created and modified time stamp information. Each Domain is identified by an ID which must be unique across all users in the YARN cluster. #### Timeline Entity @@ -111,7 +111,7 @@ Each Entity is uniquely identified by an `EntityId` and `EntityType`. #### Timeline Events A Timeline Event describes an event that is related to a specific -Timeline Entity of an application. +Timeline Entity of an application. Users are free to define what an event means —such as starting an application, getting allocated a container, @@ -156,7 +156,7 @@ and cluster operators. | `yarn.timeline-service.webapp.https.address` | The https address of the Timeline service web application. Defaults to `${yarn.timeline-service.hostname}:8190`. | | `yarn.timeline-service.bind-host` | The actual address the server will bind to. If this optional address is set, the RPC and webapp servers will bind to this address and the port specified in `yarn.timeline-service.address` and `yarn.timeline-service.webapp.address`, respectively. This is most useful for making the service listen on all interfaces by setting to `0.0.0.0`. | | `yarn.timeline-service.http-cross-origin.enabled` | Enables cross-origin support (CORS) for web services where cross-origin web response headers are needed. For example, javascript making a web services request to the timeline server. Defaults to `false`. | -| `yarn.timeline-service.http-cross-origin.allowed-origins` | Comma separated list of origins that are allowed for web services needing cross-origin (CORS) support. Wildcards `(*)` and patterns allowed. Defaults to `*`. | +| `yarn.timeline-service.http-cross-origin.allowed-origins` | Comma separated list of origins that are allowed. Values prefixed with `regex:` are interpreted as regular expressions. Values containing wildcards (`*`) are possible as well, here a regular expression is generated, the use is discouraged and support is only available for backward compatibility. Defaults to `*`. | | `yarn.timeline-service.http-cross-origin.allowed-methods` | Comma separated list of methods that are allowed for web services needing cross-origin (CORS) support. Defaults to `GET,POST,HEAD`. | | `yarn.timeline-service.http-cross-origin.allowed-headers` | Comma separated list of headers that are allowed for web services needing cross-origin (CORS) support. Defaults to `X-Requested-With,Content-Type,Accept,Origin`. | | `yarn.timeline-service.http-cross-origin.max-age` | The number of seconds a pre-flighted request can be cached for web services needing cross-origin (CORS) support. Defaults to `1800`. | @@ -420,7 +420,7 @@ response: `TimelinePutResponse` ### List domains of a user: GET `/ws/v1/timeline/domain` -Retrieves a list of all domains of a user. +Retrieves a list of all domains of a user. If an owner is specified, that owner name overrides that of the caller. @@ -598,8 +598,8 @@ Request Body: Required fields Entity: `type` and `id`. `starttime` is required unless the -entity contains one or more event). -Event: `type` and `timestamp`. +entity contains one or more event). +Event: `type` and `timestamp`. ## Timeline Entity List @@ -809,7 +809,7 @@ Response Body: } ] } - + @@ -1443,8 +1443,8 @@ None ### Elements of the `appattempts` (Application Attempt List) Object When you make a request for the list of application attempts, the information -will be returned as a collection of application attempt objects. See -[Application Attempt](#REST_API_APPLICATION_ATTEMPT) for the syntax of +will be returned as a collection of application attempt objects. See +[Application Attempt](#REST_API_APPLICATION_ATTEMPT) for the syntax of the application attempt object. | Item | Data Type | Description | @@ -1758,7 +1758,7 @@ Response Body: } ] } - + #### XML response @@ -2004,8 +2004,8 @@ Response Body: COMPLETE http://localhost:8042 - - + + ### Response Codes 1. Queries where a domain, entity type, entity ID or similar cannot be resolved result in